1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
7 define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
8 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
10 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11 ; X86-NEXT: vpbroadcastd %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc8]
12 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14 ; X86-NEXT: vpbroadcastd %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc0]
15 ; X86-NEXT: vpbroadcastd %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd0]
16 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
17 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
18 ; X86-NEXT: retl # encoding: [0xc3]
20 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
22 ; X64-NEXT: vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf]
23 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
24 ; X64-NEXT: vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7]
25 ; X64-NEXT: vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7]
26 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
27 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
28 ; X64-NEXT: retq # encoding: [0xc3]
29 %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
30 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
31 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
32 %res3 = add <4 x i32> %res, %res1
33 %res4 = add <4 x i32> %res2, %res3
38 declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
40 define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
41 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
43 ; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
44 ; X86-NEXT: # xmm1 = mem[0],zero
45 ; X86-NEXT: vpbroadcastq %xmm1, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd1]
46 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
47 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
48 ; X86-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc1]
49 ; X86-NEXT: vpbroadcastq %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc9]
50 ; X86-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
51 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
52 ; X86-NEXT: retl # encoding: [0xc3]
54 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
56 ; X64-NEXT: vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf]
57 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
58 ; X64-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7]
59 ; X64-NEXT: vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7]
60 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
61 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
62 ; X64-NEXT: retq # encoding: [0xc3]
63 %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
64 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
65 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
66 %res3 = add <2 x i64> %res, %res1
67 %res4 = add <2 x i64> %res2, %res3
72 declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
74 define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
75 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
78 ; X86-NEXT: vpbroadcastd %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xc8]
79 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
80 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
81 ; X86-NEXT: vpbroadcastd %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc0]
82 ; X86-NEXT: vpbroadcastd %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd0]
83 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
84 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
85 ; X86-NEXT: retl # encoding: [0xc3]
87 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
89 ; X64-NEXT: vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf]
90 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
91 ; X64-NEXT: vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7]
92 ; X64-NEXT: vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7]
93 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
94 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
95 ; X64-NEXT: retq # encoding: [0xc3]
96 %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
97 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
98 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
99 %res3 = add <8 x i32> %res, %res1
100 %res4 = add <8 x i32> %res2, %res3
104 declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
106 define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
107 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
109 ; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
110 ; X86-NEXT: # xmm1 = mem[0],zero
111 ; X86-NEXT: vpbroadcastq %xmm1, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd1]
112 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
113 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
114 ; X86-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc1]
115 ; X86-NEXT: vpbroadcastq %xmm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc9]
116 ; X86-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1]
117 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
118 ; X86-NEXT: retl # encoding: [0xc3]
120 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
122 ; X64-NEXT: vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf]
123 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
124 ; X64-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7]
125 ; X64-NEXT: vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7]
126 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
127 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
128 ; X64-NEXT: retq # encoding: [0xc3]
129 %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
130 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
131 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
132 %res3 = add <4 x i64> %res, %res1
133 %res4 = add <4 x i64> %res2, %res3
139 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
141 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) {
142 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_256:
144 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
145 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
146 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
147 ; X86-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
148 ; X86-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
149 ; X86-NEXT: vpaddd (%eax){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x08]
150 ; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
151 ; X86-NEXT: retl # encoding: [0xc3]
153 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_256:
155 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
156 ; X64-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
157 ; X64-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
158 ; X64-NEXT: vpaddd (%rsi){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e]
159 ; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
160 ; X64-NEXT: retq # encoding: [0xc3]
161 %y_32 = load i32, i32 * %y_ptr
162 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
163 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1)
164 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
165 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
166 %res3 = add <8 x i32> %res, %res1
167 %res4 = add <8 x i32> %res2, %res3
171 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
173 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
174 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_128:
176 ; X86-NEXT: vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0]
177 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
178 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
179 ; X86-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
180 ; X86-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
181 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
182 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
183 ; X86-NEXT: retl # encoding: [0xc3]
185 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_128:
187 ; X64-NEXT: vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0]
188 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
189 ; X64-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
190 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
191 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
192 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
193 ; X64-NEXT: retq # encoding: [0xc3]
194 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
195 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
196 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
197 %res3 = add <4 x i32> %res, %res1
198 %res4 = add <4 x i32> %res2, %res3
202 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
204 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
205 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_256:
207 ; X86-NEXT: vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0]
208 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
209 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
210 ; X86-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
211 ; X86-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
212 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
213 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
214 ; X86-NEXT: retl # encoding: [0xc3]
216 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_256:
218 ; X64-NEXT: vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0]
219 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
220 ; X64-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
221 ; X64-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
222 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
223 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
224 ; X64-NEXT: retq # encoding: [0xc3]
225 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
226 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
227 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
228 %res3 = add <4 x i64> %res, %res1
229 %res4 = add <4 x i64> %res2, %res3
233 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
235 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
236 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_128:
238 ; X86-NEXT: vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0]
239 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
240 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
241 ; X86-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
242 ; X86-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
243 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
244 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
245 ; X86-NEXT: retl # encoding: [0xc3]
247 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_128:
249 ; X64-NEXT: vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0]
250 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
251 ; X64-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
252 ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
253 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
254 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
255 ; X64-NEXT: retq # encoding: [0xc3]
256 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
257 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
258 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
259 %res3 = add <2 x i64> %res, %res1
260 %res4 = add <2 x i64> %res2, %res3
264 declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
266 define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
267 ; X86-LABEL: test_x86_vbroadcast_sd_pd_256:
269 ; X86-NEXT: vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0]
270 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
271 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
272 ; X86-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
273 ; X86-NEXT: vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
274 ; X86-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
275 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
276 ; X86-NEXT: retl # encoding: [0xc3]
278 ; X64-LABEL: test_x86_vbroadcast_sd_pd_256:
280 ; X64-NEXT: vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0]
281 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
282 ; X64-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
283 ; X64-NEXT: vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
284 ; X64-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
285 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
286 ; X64-NEXT: retq # encoding: [0xc3]
287 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
288 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
289 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
290 %res3 = fadd <4 x double> %res, %res1
291 %res4 = fadd <4 x double> %res2, %res3
292 ret <4 x double> %res4
295 declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
297 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
298 ; X86-LABEL: test_x86_vbroadcast_ss_ps_256:
300 ; X86-NEXT: vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0]
301 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
302 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
303 ; X86-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
304 ; X86-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
305 ; X86-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
306 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
307 ; X86-NEXT: retl # encoding: [0xc3]
309 ; X64-LABEL: test_x86_vbroadcast_ss_ps_256:
311 ; X64-NEXT: vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0]
312 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
313 ; X64-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
314 ; X64-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
315 ; X64-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
316 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
317 ; X64-NEXT: retq # encoding: [0xc3]
318 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
319 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
320 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
321 %res3 = fadd <8 x float> %res, %res1
322 %res4 = fadd <8 x float> %res2, %res3
323 ret <8 x float> %res4
326 declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
328 define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
329 ; X86-LABEL: test_x86_vbroadcast_ss_ps_128:
331 ; X86-NEXT: vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0]
332 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
333 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
334 ; X86-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
335 ; X86-NEXT: vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9]
336 ; X86-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
337 ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
338 ; X86-NEXT: retl # encoding: [0xc3]
340 ; X64-LABEL: test_x86_vbroadcast_ss_ps_128:
342 ; X64-NEXT: vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0]
343 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
344 ; X64-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
345 ; X64-NEXT: vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9]
346 ; X64-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
347 ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
348 ; X64-NEXT: retq # encoding: [0xc3]
349 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
350 %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
351 %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
352 %res3 = fadd <4 x float> %res, %res1
353 %res4 = fadd <4 x float> %res2, %res3
354 ret <4 x float> %res4
357 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
359 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
360 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_128:
362 ; X86-NEXT: vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0]
363 ; X86-NEXT: # xmm2 = xmm0[0,0,2,2]
364 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
365 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
366 ; X86-NEXT: vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
367 ; X86-NEXT: # xmm1 {%k1} = xmm0[0,0,2,2]
368 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
369 ; X86-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
370 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,0,2,2]
371 ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
372 ; X86-NEXT: retl # encoding: [0xc3]
374 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_128:
376 ; X64-NEXT: vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0]
377 ; X64-NEXT: # xmm2 = xmm0[0,0,2,2]
378 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
379 ; X64-NEXT: vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
380 ; X64-NEXT: # xmm1 {%k1} = xmm0[0,0,2,2]
381 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
382 ; X64-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
383 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,0,2,2]
384 ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
385 ; X64-NEXT: retq # encoding: [0xc3]
386 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
387 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
388 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
389 %res3 = fadd <4 x float> %res, %res1
390 %res4 = fadd <4 x float> %res2, %res3
391 ret <4 x float> %res4
394 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
396 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
397 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_256:
399 ; X86-NEXT: vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0]
400 ; X86-NEXT: # ymm2 = ymm0[0,0,2,2,4,4,6,6]
401 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
402 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
403 ; X86-NEXT: vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
404 ; X86-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
405 ; X86-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
406 ; X86-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
407 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
408 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
409 ; X86-NEXT: retl # encoding: [0xc3]
411 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_256:
413 ; X64-NEXT: vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0]
414 ; X64-NEXT: # ymm2 = ymm0[0,0,2,2,4,4,6,6]
415 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
416 ; X64-NEXT: vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
417 ; X64-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
418 ; X64-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
419 ; X64-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
420 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
421 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
422 ; X64-NEXT: retq # encoding: [0xc3]
423 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
424 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
425 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
426 %res3 = fadd <8 x float> %res, %res1
427 %res4 = fadd <8 x float> %res2, %res3
428 ret <8 x float> %res4
431 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
433 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
434 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_128:
436 ; X86-NEXT: vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0]
437 ; X86-NEXT: # xmm2 = xmm0[1,1,3,3]
438 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
439 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
440 ; X86-NEXT: vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
441 ; X86-NEXT: # xmm1 {%k1} = xmm0[1,1,3,3]
442 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
443 ; X86-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
444 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1,1,3,3]
445 ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
446 ; X86-NEXT: retl # encoding: [0xc3]
448 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_128:
450 ; X64-NEXT: vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0]
451 ; X64-NEXT: # xmm2 = xmm0[1,1,3,3]
452 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
453 ; X64-NEXT: vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
454 ; X64-NEXT: # xmm1 {%k1} = xmm0[1,1,3,3]
455 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
456 ; X64-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
457 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1,1,3,3]
458 ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
459 ; X64-NEXT: retq # encoding: [0xc3]
460 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
461 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
462 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
463 %res3 = fadd <4 x float> %res, %res1
464 %res4 = fadd <4 x float> %res2, %res3
465 ret <4 x float> %res4
468 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
470 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
471 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_256:
473 ; X86-NEXT: vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0]
474 ; X86-NEXT: # ymm2 = ymm0[1,1,3,3,5,5,7,7]
475 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
476 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
477 ; X86-NEXT: vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
478 ; X86-NEXT: # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
479 ; X86-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
480 ; X86-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
481 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
482 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
483 ; X86-NEXT: retl # encoding: [0xc3]
485 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_256:
487 ; X64-NEXT: vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0]
488 ; X64-NEXT: # ymm2 = ymm0[1,1,3,3,5,5,7,7]
489 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
490 ; X64-NEXT: vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
491 ; X64-NEXT: # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
492 ; X64-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
493 ; X64-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
494 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
495 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
496 ; X64-NEXT: retq # encoding: [0xc3]
497 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
498 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
499 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
500 %res3 = fadd <8 x float> %res, %res1
501 %res4 = fadd <8 x float> %res2, %res3
502 ret <8 x float> %res4
504 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
506 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
507 ; X86-LABEL: test_int_x86_avx512_mask_movddup_128:
509 ; X86-NEXT: vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0]
510 ; X86-NEXT: # xmm2 = xmm0[0,0]
511 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
512 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
513 ; X86-NEXT: vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
514 ; X86-NEXT: # xmm1 {%k1} = xmm0[0,0]
515 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
516 ; X86-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
517 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,0]
518 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
519 ; X86-NEXT: retl # encoding: [0xc3]
521 ; X64-LABEL: test_int_x86_avx512_mask_movddup_128:
523 ; X64-NEXT: vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0]
524 ; X64-NEXT: # xmm2 = xmm0[0,0]
525 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
526 ; X64-NEXT: vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
527 ; X64-NEXT: # xmm1 {%k1} = xmm0[0,0]
528 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
529 ; X64-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
530 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,0]
531 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
532 ; X64-NEXT: retq # encoding: [0xc3]
533 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
534 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
535 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
536 %res3 = fadd <2 x double> %res, %res1
537 %res4 = fadd <2 x double> %res2, %res3
538 ret <2 x double> %res4
541 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
543 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
544 ; X86-LABEL: test_int_x86_avx512_mask_movddup_256:
546 ; X86-NEXT: vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0]
547 ; X86-NEXT: # ymm2 = ymm0[0,0,2,2]
548 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
549 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
550 ; X86-NEXT: vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
551 ; X86-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2]
552 ; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca]
553 ; X86-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
554 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2]
555 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
556 ; X86-NEXT: retl # encoding: [0xc3]
558 ; X64-LABEL: test_int_x86_avx512_mask_movddup_256:
560 ; X64-NEXT: vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0]
561 ; X64-NEXT: # ymm2 = ymm0[0,0,2,2]
562 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
563 ; X64-NEXT: vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
564 ; X64-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2]
565 ; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca]
566 ; X64-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
567 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2]
568 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
569 ; X64-NEXT: retq # encoding: [0xc3]
570 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
571 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
572 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
573 %res3 = fadd <4 x double> %res, %res1
574 %res4 = fadd <4 x double> %res2, %res3
575 ret <4 x double> %res4
578 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
580 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
581 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
583 ; X86-NEXT: vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06]
584 ; X86-NEXT: # ymm2 = ymm0[0,1,3,2]
585 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
586 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
587 ; X86-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
588 ; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,3,2]
589 ; X86-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
590 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,3,2]
591 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
592 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
593 ; X86-NEXT: retl # encoding: [0xc3]
595 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
597 ; X64-NEXT: vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06]
598 ; X64-NEXT: # ymm2 = ymm0[0,1,3,2]
599 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
600 ; X64-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
601 ; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,3,2]
602 ; X64-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
603 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,3,2]
604 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
605 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
606 ; X64-NEXT: retq # encoding: [0xc3]
607 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
608 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
609 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
610 %res3 = fadd <4 x double> %res, %res1
611 %res4 = fadd <4 x double> %res2, %res3
612 ret <4 x double> %res4
615 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
617 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
618 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
620 ; X86-NEXT: vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01]
621 ; X86-NEXT: # xmm2 = xmm0[1,0]
622 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
623 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
624 ; X86-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
625 ; X86-NEXT: # xmm1 {%k1} = xmm0[1,0]
626 ; X86-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
627 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1,0]
628 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
629 ; X86-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
630 ; X86-NEXT: retl # encoding: [0xc3]
632 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
634 ; X64-NEXT: vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01]
635 ; X64-NEXT: # xmm2 = xmm0[1,0]
636 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
637 ; X64-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
638 ; X64-NEXT: # xmm1 {%k1} = xmm0[1,0]
639 ; X64-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
640 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1,0]
641 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
642 ; X64-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
643 ; X64-NEXT: retq # encoding: [0xc3]
644 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
645 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
646 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
647 %res3 = fadd <2 x double> %res, %res1
648 %res4 = fadd <2 x double> %res3, %res2
649 ret <2 x double> %res4
652 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
654 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
655 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
657 ; X86-NEXT: vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16]
658 ; X86-NEXT: # ymm2 = ymm0[2,1,1,0,6,5,5,4]
659 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
660 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
661 ; X86-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
662 ; X86-NEXT: # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
663 ; X86-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
664 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
665 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
666 ; X86-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
667 ; X86-NEXT: retl # encoding: [0xc3]
669 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
671 ; X64-NEXT: vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16]
672 ; X64-NEXT: # ymm2 = ymm0[2,1,1,0,6,5,5,4]
673 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
674 ; X64-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
675 ; X64-NEXT: # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
676 ; X64-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
677 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
678 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
679 ; X64-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
680 ; X64-NEXT: retq # encoding: [0xc3]
681 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
682 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
683 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
684 %res3 = fadd <8 x float> %res, %res1
685 %res4 = fadd <8 x float> %res3, %res2
686 ret <8 x float> %res4
689 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
691 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
692 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
694 ; X86-NEXT: vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16]
695 ; X86-NEXT: # xmm2 = xmm0[2,1,1,0]
696 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
697 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
698 ; X86-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
699 ; X86-NEXT: # xmm1 {%k1} = xmm0[2,1,1,0]
700 ; X86-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
701 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[2,1,1,0]
702 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
703 ; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
704 ; X86-NEXT: retl # encoding: [0xc3]
706 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
708 ; X64-NEXT: vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16]
709 ; X64-NEXT: # xmm2 = xmm0[2,1,1,0]
710 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
711 ; X64-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
712 ; X64-NEXT: # xmm1 {%k1} = xmm0[2,1,1,0]
713 ; X64-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
714 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[2,1,1,0]
715 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
716 ; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
717 ; X64-NEXT: retq # encoding: [0xc3]
718 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
719 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
720 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
721 %res3 = fadd <4 x float> %res, %res1
722 %res4 = fadd <4 x float> %res2, %res3
723 ret <4 x float> %res4
726 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
728 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
729 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_256:
731 ; X86-NEXT: vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03]
732 ; X86-NEXT: # ymm2 = ymm0[3,0,0,0]
733 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
734 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
735 ; X86-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
736 ; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0]
737 ; X86-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
738 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0]
739 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
740 ; X86-NEXT: vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
741 ; X86-NEXT: retl # encoding: [0xc3]
743 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_256:
745 ; X64-NEXT: vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03]
746 ; X64-NEXT: # ymm2 = ymm0[3,0,0,0]
747 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
748 ; X64-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
749 ; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0]
750 ; X64-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
751 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0]
752 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
753 ; X64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
754 ; X64-NEXT: retq # encoding: [0xc3]
755 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
756 %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
757 %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
758 %res3 = fadd <4 x double> %res, %res1
759 %res4 = fadd <4 x double> %res3, %res2
760 ret <4 x double> %res4
763 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
765 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
766 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_256:
768 ; X86-NEXT: vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03]
769 ; X86-NEXT: # ymm2 = ymm0[3,0,0,0]
770 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
771 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
772 ; X86-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
773 ; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0]
774 ; X86-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
775 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0]
776 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
777 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
778 ; X86-NEXT: retl # encoding: [0xc3]
780 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_256:
782 ; X64-NEXT: vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03]
783 ; X64-NEXT: # ymm2 = ymm0[3,0,0,0]
784 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
785 ; X64-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
786 ; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0]
787 ; X64-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
788 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0]
789 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
790 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
791 ; X64-NEXT: retq # encoding: [0xc3]
792 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
793 %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
794 %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
795 %res3 = add <4 x i64> %res, %res1
796 %res4 = add <4 x i64> %res3, %res2
800 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
802 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
803 ; X86-LABEL: test_int_x86_avx512_mask_store_pd_128:
805 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
806 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
807 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
808 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
809 ; X86-NEXT: vmovapd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x01]
810 ; X86-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
811 ; X86-NEXT: retl # encoding: [0xc3]
813 ; X64-LABEL: test_int_x86_avx512_mask_store_pd_128:
815 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
816 ; X64-NEXT: vmovapd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
817 ; X64-NEXT: vmovapd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06]
818 ; X64-NEXT: retq # encoding: [0xc3]
819 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
820 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
824 declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
826 define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
827 ; X86-LABEL: test_int_x86_avx512_mask_store_pd_256:
829 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
830 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
832 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
833 ; X86-NEXT: vmovapd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x01]
834 ; X86-NEXT: vmovapd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x00]
835 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
836 ; X86-NEXT: retl # encoding: [0xc3]
838 ; X64-LABEL: test_int_x86_avx512_mask_store_pd_256:
840 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
841 ; X64-NEXT: vmovapd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
842 ; X64-NEXT: vmovapd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06]
843 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
844 ; X64-NEXT: retq # encoding: [0xc3]
845 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
846 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
850 declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
852 define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
853 ; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
855 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
856 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
857 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
858 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
859 ; X86-NEXT: vmovupd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x01]
860 ; X86-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
861 ; X86-NEXT: retl # encoding: [0xc3]
863 ; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
865 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
866 ; X64-NEXT: vmovupd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
867 ; X64-NEXT: vmovupd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06]
868 ; X64-NEXT: retq # encoding: [0xc3]
869 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
870 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
874 declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
876 define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
877 ; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
879 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
880 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
881 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
882 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
883 ; X86-NEXT: vmovupd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x01]
884 ; X86-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
885 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
886 ; X86-NEXT: retl # encoding: [0xc3]
888 ; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
890 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
891 ; X64-NEXT: vmovupd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
892 ; X64-NEXT: vmovupd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06]
893 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
894 ; X64-NEXT: retq # encoding: [0xc3]
895 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
896 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
900 declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
902 define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
903 ; X86-LABEL: test_int_x86_avx512_mask_store_ps_128:
905 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
906 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
907 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
908 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
909 ; X86-NEXT: vmovaps %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x01]
910 ; X86-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
911 ; X86-NEXT: retl # encoding: [0xc3]
913 ; X64-LABEL: test_int_x86_avx512_mask_store_ps_128:
915 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
916 ; X64-NEXT: vmovaps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
917 ; X64-NEXT: vmovaps %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06]
918 ; X64-NEXT: retq # encoding: [0xc3]
919 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
920 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
924 declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
926 define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
927 ; X86-LABEL: test_int_x86_avx512_mask_store_ps_256:
929 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
930 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
931 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
932 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
933 ; X86-NEXT: vmovaps %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x01]
934 ; X86-NEXT: vmovaps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x00]
935 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
936 ; X86-NEXT: retl # encoding: [0xc3]
938 ; X64-LABEL: test_int_x86_avx512_mask_store_ps_256:
940 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
941 ; X64-NEXT: vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
942 ; X64-NEXT: vmovaps %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06]
943 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
944 ; X64-NEXT: retq # encoding: [0xc3]
945 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
946 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
950 declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
952 define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
953 ; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
955 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
956 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
957 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
958 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
959 ; X86-NEXT: vmovups %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x01]
960 ; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
961 ; X86-NEXT: retl # encoding: [0xc3]
963 ; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
965 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
966 ; X64-NEXT: vmovups %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
967 ; X64-NEXT: vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06]
968 ; X64-NEXT: retq # encoding: [0xc3]
969 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
970 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
974 declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
976 define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
977 ; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
979 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
980 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
981 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
982 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
983 ; X86-NEXT: vmovups %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x01]
984 ; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
985 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
986 ; X86-NEXT: retl # encoding: [0xc3]
988 ; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
990 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
991 ; X64-NEXT: vmovups %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
992 ; X64-NEXT: vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06]
993 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
994 ; X64-NEXT: retq # encoding: [0xc3]
995 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
996 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
1000 declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
1002 define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
1003 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_128:
1005 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1006 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1007 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1008 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1009 ; X86-NEXT: vmovdqu64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x01]
1010 ; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
1011 ; X86-NEXT: retl # encoding: [0xc3]
1013 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_128:
1015 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1016 ; X64-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
1017 ; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
1018 ; X64-NEXT: retq # encoding: [0xc3]
1019 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
1020 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
1024 declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
1026 define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
1027 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_256:
1029 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1030 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1031 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1032 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1033 ; X86-NEXT: vmovdqu64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x01]
1034 ; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
1035 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1036 ; X86-NEXT: retl # encoding: [0xc3]
1038 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_256:
1040 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1041 ; X64-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
1042 ; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
1043 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1044 ; X64-NEXT: retq # encoding: [0xc3]
1045 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
1046 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
1050 declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
1052 define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
1053 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_128:
1055 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1056 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1057 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1058 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1059 ; X86-NEXT: vmovdqu32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x01]
1060 ; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
1061 ; X86-NEXT: retl # encoding: [0xc3]
1063 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_128:
1065 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1066 ; X64-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
1067 ; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
1068 ; X64-NEXT: retq # encoding: [0xc3]
1069 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
1070 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
1074 declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
1076 define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
1077 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_256:
1079 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1080 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1081 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1082 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1083 ; X86-NEXT: vmovdqu32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x01]
1084 ; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
1085 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1086 ; X86-NEXT: retl # encoding: [0xc3]
1088 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_256:
1090 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1091 ; X64-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
1092 ; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
1093 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1094 ; X64-NEXT: retq # encoding: [0xc3]
1095 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
1096 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
1100 declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
1102 define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
1103 ; X86-LABEL: test_int_x86_avx512_mask_store_q_128:
1105 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1106 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1107 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1108 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1109 ; X86-NEXT: vmovdqa64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x01]
1110 ; X86-NEXT: vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
1111 ; X86-NEXT: retl # encoding: [0xc3]
1113 ; X64-LABEL: test_int_x86_avx512_mask_store_q_128:
1115 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1116 ; X64-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
1117 ; X64-NEXT: vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
1118 ; X64-NEXT: retq # encoding: [0xc3]
1119 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
1120 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
1124 declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
1126 define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
1127 ; X86-LABEL: test_int_x86_avx512_mask_store_q_256:
1129 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1130 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1131 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1132 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1133 ; X86-NEXT: vmovdqa64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x01]
1134 ; X86-NEXT: vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
1135 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1136 ; X86-NEXT: retl # encoding: [0xc3]
1138 ; X64-LABEL: test_int_x86_avx512_mask_store_q_256:
1140 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1141 ; X64-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
1142 ; X64-NEXT: vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
1143 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1144 ; X64-NEXT: retq # encoding: [0xc3]
1145 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
1146 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
1150 declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
1152 define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
1153 ; X86-LABEL: test_int_x86_avx512_mask_store_d_128:
1155 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1157 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1158 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1159 ; X86-NEXT: vmovdqa32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x01]
1160 ; X86-NEXT: vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
1161 ; X86-NEXT: retl # encoding: [0xc3]
1163 ; X64-LABEL: test_int_x86_avx512_mask_store_d_128:
1165 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1166 ; X64-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
1167 ; X64-NEXT: vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
1168 ; X64-NEXT: retq # encoding: [0xc3]
1169 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
1170 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
1174 declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
1176 define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
1177 ; X86-LABEL: test_int_x86_avx512_mask_store_d_256:
1179 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1181 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1182 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1183 ; X86-NEXT: vmovdqa32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x01]
1184 ; X86-NEXT: vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
1185 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1186 ; X86-NEXT: retl # encoding: [0xc3]
1188 ; X64-LABEL: test_int_x86_avx512_mask_store_d_256:
1190 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1191 ; X64-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
1192 ; X64-NEXT: vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
1193 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1194 ; X64-NEXT: retq # encoding: [0xc3]
1195 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
1196 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
1200 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
1201 ; X86-LABEL: test_mask_load_aligned_ps_256:
1203 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1204 ; X86-NEXT: vmovaps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x00]
1205 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1206 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1207 ; X86-NEXT: vmovaps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x00]
1208 ; X86-NEXT: vmovaps (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x08]
1209 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1210 ; X86-NEXT: retl # encoding: [0xc3]
1212 ; X64-LABEL: test_mask_load_aligned_ps_256:
1214 ; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
1215 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1216 ; X64-NEXT: vmovaps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
1217 ; X64-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f]
1218 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1219 ; X64-NEXT: retq # encoding: [0xc3]
1220 %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
1221 %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
1222 %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
1223 %res4 = fadd <8 x float> %res2, %res1
1224 ret <8 x float> %res4
1227 declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
1229 define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
1230 ; X86-LABEL: test_mask_load_unaligned_ps_256:
1232 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1233 ; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
1234 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1235 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1236 ; X86-NEXT: vmovups (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x00]
1237 ; X86-NEXT: vmovups (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x08]
1238 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1239 ; X86-NEXT: retl # encoding: [0xc3]
1241 ; X64-LABEL: test_mask_load_unaligned_ps_256:
1243 ; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
1244 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1245 ; X64-NEXT: vmovups (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
1246 ; X64-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f]
1247 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1248 ; X64-NEXT: retq # encoding: [0xc3]
1249 %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
1250 %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
1251 %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
1252 %res4 = fadd <8 x float> %res2, %res1
1253 ret <8 x float> %res4
1256 declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
1258 define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
1259 ; X86-LABEL: test_mask_load_aligned_pd_256:
1261 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1262 ; X86-NEXT: vmovapd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x00]
1263 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1264 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1265 ; X86-NEXT: vmovapd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x00]
1266 ; X86-NEXT: vmovapd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x08]
1267 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1268 ; X86-NEXT: retl # encoding: [0xc3]
1270 ; X64-LABEL: test_mask_load_aligned_pd_256:
1272 ; X64-NEXT: vmovapd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07]
1273 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1274 ; X64-NEXT: vmovapd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
1275 ; X64-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f]
1276 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1277 ; X64-NEXT: retq # encoding: [0xc3]
1278 %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
1279 %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
1280 %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
1281 %res4 = fadd <4 x double> %res2, %res1
1282 ret <4 x double> %res4
1285 declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
1287 define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
1288 ; X86-LABEL: test_mask_load_unaligned_pd_256:
1290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1291 ; X86-NEXT: vmovupd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x00]
1292 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1293 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1294 ; X86-NEXT: vmovupd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x00]
1295 ; X86-NEXT: vmovupd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x08]
1296 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1297 ; X86-NEXT: retl # encoding: [0xc3]
1299 ; X64-LABEL: test_mask_load_unaligned_pd_256:
1301 ; X64-NEXT: vmovupd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07]
1302 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1303 ; X64-NEXT: vmovupd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
1304 ; X64-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f]
1305 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1306 ; X64-NEXT: retq # encoding: [0xc3]
1307 %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
1308 %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
1309 %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
1310 %res4 = fadd <4 x double> %res2, %res1
1311 ret <4 x double> %res4
1314 declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
1316 define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
1317 ; X86-LABEL: test_mask_load_aligned_ps_128:
1319 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1320 ; X86-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1321 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1322 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1323 ; X86-NEXT: vmovaps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x00]
1324 ; X86-NEXT: vmovaps (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x08]
1325 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1326 ; X86-NEXT: retl # encoding: [0xc3]
1328 ; X64-LABEL: test_mask_load_aligned_ps_128:
1330 ; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1331 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1332 ; X64-NEXT: vmovaps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
1333 ; X64-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f]
1334 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1335 ; X64-NEXT: retq # encoding: [0xc3]
1336 %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
1337 %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
1338 %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
1339 %res4 = fadd <4 x float> %res2, %res1
1340 ret <4 x float> %res4
1343 declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
1345 define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
1346 ; X86-LABEL: test_mask_load_unaligned_ps_128:
1348 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1349 ; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
1350 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1351 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1352 ; X86-NEXT: vmovups (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x00]
1353 ; X86-NEXT: vmovups (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x08]
1354 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1355 ; X86-NEXT: retl # encoding: [0xc3]
1357 ; X64-LABEL: test_mask_load_unaligned_ps_128:
1359 ; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
1360 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1361 ; X64-NEXT: vmovups (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
1362 ; X64-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f]
1363 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1364 ; X64-NEXT: retq # encoding: [0xc3]
1365 %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
1366 %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
1367 %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
1368 %res4 = fadd <4 x float> %res2, %res1
1369 ret <4 x float> %res4
1372 declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
1374 define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
1375 ; X86-LABEL: test_mask_load_aligned_pd_128:
1377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1378 ; X86-NEXT: vmovapd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
1379 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1380 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1381 ; X86-NEXT: vmovapd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x00]
1382 ; X86-NEXT: vmovapd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x08]
1383 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1384 ; X86-NEXT: retl # encoding: [0xc3]
1386 ; X64-LABEL: test_mask_load_aligned_pd_128:
1388 ; X64-NEXT: vmovapd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
1389 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1390 ; X64-NEXT: vmovapd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
1391 ; X64-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f]
1392 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1393 ; X64-NEXT: retq # encoding: [0xc3]
1394 %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
1395 %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
1396 %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
1397 %res4 = fadd <2 x double> %res2, %res1
1398 ret <2 x double> %res4
1401 declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
1403 define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
1404 ; X86-LABEL: test_mask_load_unaligned_pd_128:
1406 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1407 ; X86-NEXT: vmovupd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x00]
1408 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1409 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1410 ; X86-NEXT: vmovupd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x00]
1411 ; X86-NEXT: vmovupd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x08]
1412 ; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1413 ; X86-NEXT: retl # encoding: [0xc3]
1415 ; X64-LABEL: test_mask_load_unaligned_pd_128:
1417 ; X64-NEXT: vmovupd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07]
1418 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1419 ; X64-NEXT: vmovupd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
1420 ; X64-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f]
1421 ; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1422 ; X64-NEXT: retq # encoding: [0xc3]
1423 %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
1424 %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
1425 %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
1426 %res4 = fadd <2 x double> %res2, %res1
1427 ret <2 x double> %res4
1430 declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
1432 declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8)
1434 define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) {
1435 ; X86-LABEL: test_mask_load_unaligned_d_128:
1437 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1438 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1439 ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
1440 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1441 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1442 ; X86-NEXT: vmovdqu32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x00]
1443 ; X86-NEXT: vmovdqu32 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x09]
1444 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1445 ; X86-NEXT: retl # encoding: [0xc3]
1447 ; X64-LABEL: test_mask_load_unaligned_d_128:
1449 ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
1450 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1451 ; X64-NEXT: vmovdqu32 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06]
1452 ; X64-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f]
1453 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1454 ; X64-NEXT: retq # encoding: [0xc3]
1455 %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
1456 %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask)
1457 %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
1458 %res4 = add <4 x i32> %res2, %res1
1462 declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8)
1464 define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) {
1465 ; X86-LABEL: test_mask_load_unaligned_d_256:
1467 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1468 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1469 ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
1470 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1471 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1472 ; X86-NEXT: vmovdqu32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x00]
1473 ; X86-NEXT: vmovdqu32 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x09]
1474 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1475 ; X86-NEXT: retl # encoding: [0xc3]
1477 ; X64-LABEL: test_mask_load_unaligned_d_256:
1479 ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
1480 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1481 ; X64-NEXT: vmovdqu32 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06]
1482 ; X64-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f]
1483 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1484 ; X64-NEXT: retq # encoding: [0xc3]
1485 %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
1486 %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask)
1487 %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
1488 %res4 = add <8 x i32> %res2, %res1
1492 declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8)
1494 define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) {
1495 ; X86-LABEL: test_mask_load_unaligned_q_128:
1497 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1498 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1499 ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
1500 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1501 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1502 ; X86-NEXT: vmovdqu64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x00]
1503 ; X86-NEXT: vmovdqu64 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x09]
1504 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1505 ; X86-NEXT: retl # encoding: [0xc3]
1507 ; X64-LABEL: test_mask_load_unaligned_q_128:
1509 ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
1510 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1511 ; X64-NEXT: vmovdqu64 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06]
1512 ; X64-NEXT: vmovdqu64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f]
1513 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1514 ; X64-NEXT: retq # encoding: [0xc3]
1515 %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
1516 %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask)
1517 %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
1518 %res4 = add <2 x i64> %res2, %res1
1522 declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8)
1524 define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) {
1525 ; X86-LABEL: test_mask_load_unaligned_q_256:
1527 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1528 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1529 ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
1530 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1531 ; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1532 ; X86-NEXT: vmovdqu64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x00]
1533 ; X86-NEXT: vmovdqu64 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x09]
1534 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1535 ; X86-NEXT: retl # encoding: [0xc3]
1537 ; X64-LABEL: test_mask_load_unaligned_q_256:
1539 ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
1540 ; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1541 ; X64-NEXT: vmovdqu64 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06]
1542 ; X64-NEXT: vmovdqu64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f]
1543 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1544 ; X64-NEXT: retq # encoding: [0xc3]
1545 %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
1546 %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask)
1547 %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
1548 %res4 = add <4 x i64> %res2, %res1
1552 declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
1554 define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
1555 ; X86-LABEL: test_mask_load_aligned_d_128:
1557 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1558 ; X86-NEXT: vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
1559 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1560 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1561 ; X86-NEXT: vmovdqa32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x00]
1562 ; X86-NEXT: vmovdqa32 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x08]
1563 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1564 ; X86-NEXT: retl # encoding: [0xc3]
1566 ; X64-LABEL: test_mask_load_aligned_d_128:
1568 ; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
1569 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1570 ; X64-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
1571 ; X64-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f]
1572 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1573 ; X64-NEXT: retq # encoding: [0xc3]
1574 %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
1575 %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask)
1576 %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
1577 %res4 = add <4 x i32> %res2, %res1
1581 declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
1583 define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
1584 ; X86-LABEL: test_mask_load_aligned_d_256:
1586 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1587 ; X86-NEXT: vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
1588 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1589 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1590 ; X86-NEXT: vmovdqa32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x00]
1591 ; X86-NEXT: vmovdqa32 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x08]
1592 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1593 ; X86-NEXT: retl # encoding: [0xc3]
1595 ; X64-LABEL: test_mask_load_aligned_d_256:
1597 ; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
1598 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1599 ; X64-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
1600 ; X64-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f]
1601 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1602 ; X64-NEXT: retq # encoding: [0xc3]
1603 %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
1604 %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask)
1605 %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
1606 %res4 = add <8 x i32> %res2, %res1
1610 declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
1612 define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
1613 ; X86-LABEL: test_mask_load_aligned_q_128:
1615 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1616 ; X86-NEXT: vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
1617 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1618 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1619 ; X86-NEXT: vmovdqa64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x00]
1620 ; X86-NEXT: vmovdqa64 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x08]
1621 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1622 ; X86-NEXT: retl # encoding: [0xc3]
1624 ; X64-LABEL: test_mask_load_aligned_q_128:
1626 ; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
1627 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1628 ; X64-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
1629 ; X64-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f]
1630 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1631 ; X64-NEXT: retq # encoding: [0xc3]
1632 %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
1633 %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask)
1634 %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
1635 %res4 = add <2 x i64> %res2, %res1
1639 declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
1641 define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
1642 ; X86-LABEL: test_mask_load_aligned_q_256:
1644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1645 ; X86-NEXT: vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
1646 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1647 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1648 ; X86-NEXT: vmovdqa64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x00]
1649 ; X86-NEXT: vmovdqa64 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x08]
1650 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1651 ; X86-NEXT: retl # encoding: [0xc3]
1653 ; X64-LABEL: test_mask_load_aligned_q_256:
1655 ; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
1656 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1657 ; X64-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
1658 ; X64-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f]
1659 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1660 ; X64-NEXT: retq # encoding: [0xc3]
1661 %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
1662 %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask)
1663 %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
1664 %res4 = add <4 x i64> %res2, %res1
1668 declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8)
1670 define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
1671 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
1673 ; X86-NEXT: vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03]
1674 ; X86-NEXT: # xmm2 = xmm0[3,0,0,0]
1675 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1676 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1677 ; X86-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
1678 ; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0]
1679 ; X86-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
1680 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0]
1681 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
1682 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1683 ; X86-NEXT: retl # encoding: [0xc3]
1685 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
1687 ; X64-NEXT: vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03]
1688 ; X64-NEXT: # xmm2 = xmm0[3,0,0,0]
1689 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1690 ; X64-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
1691 ; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0]
1692 ; X64-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
1693 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0]
1694 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
1695 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1696 ; X64-NEXT: retq # encoding: [0xc3]
1697 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
1698 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
1699 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
1700 %res3 = add <4 x i32> %res, %res1
1701 %res4 = add <4 x i32> %res3, %res2
1705 declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8)
1707 define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
1708 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
1710 ; X86-NEXT: vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03]
1711 ; X86-NEXT: # ymm2 = ymm0[3,0,0,0,7,4,4,4]
1712 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1713 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1714 ; X86-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
1715 ; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
1716 ; X86-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
1717 ; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
1718 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
1719 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1720 ; X86-NEXT: retl # encoding: [0xc3]
1722 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
1724 ; X64-NEXT: vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03]
1725 ; X64-NEXT: # ymm2 = ymm0[3,0,0,0,7,4,4,4]
1726 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1727 ; X64-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
1728 ; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
1729 ; X64-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
1730 ; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
1731 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
1732 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1733 ; X64-NEXT: retq # encoding: [0xc3]
1734 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
1735 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
1736 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
1737 %res3 = add <8 x i32> %res, %res1
1738 %res4 = add <8 x i32> %res3, %res2
1742 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
1743 ; CHECK-LABEL: test_pcmpeq_d_256:
1745 ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
1746 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1747 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1748 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1749 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1750 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
1754 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1755 ; X86-LABEL: test_mask_pcmpeq_d_256:
1757 ; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
1758 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1759 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1760 ; X86-NEXT: # kill: def $al killed $al killed $eax
1761 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1762 ; X86-NEXT: retl # encoding: [0xc3]
1764 ; X64-LABEL: test_mask_pcmpeq_d_256:
1766 ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
1767 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1768 ; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8]
1769 ; X64-NEXT: # kill: def $al killed $al killed $eax
1770 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1771 ; X64-NEXT: retq # encoding: [0xc3]
1772 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
1776 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
1778 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
1779 ; CHECK-LABEL: test_pcmpeq_q_256:
1781 ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
1782 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1783 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1784 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1785 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1786 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
1790 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
1791 ; X86-LABEL: test_mask_pcmpeq_q_256:
1793 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1794 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1795 ; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
1796 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1797 ; X86-NEXT: # kill: def $al killed $al killed $eax
1798 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1799 ; X86-NEXT: retl # encoding: [0xc3]
1801 ; X64-LABEL: test_mask_pcmpeq_q_256:
1803 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1804 ; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
1805 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1806 ; X64-NEXT: # kill: def $al killed $al killed $eax
1807 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1808 ; X64-NEXT: retq # encoding: [0xc3]
1809 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
1813 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
1815 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
1816 ; CHECK-LABEL: test_pcmpgt_d_256:
1818 ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1819 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1820 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1821 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1822 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1823 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
1827 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1828 ; X86-LABEL: test_mask_pcmpgt_d_256:
1830 ; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1831 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1832 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
1833 ; X86-NEXT: # kill: def $al killed $al killed $eax
1834 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1835 ; X86-NEXT: retl # encoding: [0xc3]
1837 ; X64-LABEL: test_mask_pcmpgt_d_256:
1839 ; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1840 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1841 ; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8]
1842 ; X64-NEXT: # kill: def $al killed $al killed $eax
1843 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1844 ; X64-NEXT: retq # encoding: [0xc3]
1845 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
1849 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
1851 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
1852 ; CHECK-LABEL: test_pcmpgt_q_256:
1854 ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
1855 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1856 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1857 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1858 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1859 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
1863 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
1864 ; X86-LABEL: test_mask_pcmpgt_q_256:
1866 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1867 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1868 ; X86-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
1869 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1870 ; X86-NEXT: # kill: def $al killed $al killed $eax
1871 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1872 ; X86-NEXT: retl # encoding: [0xc3]
1874 ; X64-LABEL: test_mask_pcmpgt_q_256:
1876 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1877 ; X64-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
1878 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1879 ; X64-NEXT: # kill: def $al killed $al killed $eax
1880 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1881 ; X64-NEXT: retq # encoding: [0xc3]
1882 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
1886 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
1888 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
1889 ; CHECK-LABEL: test_pcmpeq_d_128:
1891 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
1892 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1893 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1894 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1895 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
1899 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1900 ; X86-LABEL: test_mask_pcmpeq_d_128:
1902 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1903 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1904 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
1905 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1906 ; X86-NEXT: # kill: def $al killed $al killed $eax
1907 ; X86-NEXT: retl # encoding: [0xc3]
1909 ; X64-LABEL: test_mask_pcmpeq_d_128:
1911 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1912 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
1913 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1914 ; X64-NEXT: # kill: def $al killed $al killed $eax
1915 ; X64-NEXT: retq # encoding: [0xc3]
1916 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
1920 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
1922 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
1923 ; CHECK-LABEL: test_pcmpeq_q_128:
1925 ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
1926 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1927 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1928 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1929 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
1933 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
1934 ; X86-LABEL: test_mask_pcmpeq_q_128:
1936 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1937 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1938 ; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
1939 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1940 ; X86-NEXT: # kill: def $al killed $al killed $eax
1941 ; X86-NEXT: retl # encoding: [0xc3]
1943 ; X64-LABEL: test_mask_pcmpeq_q_128:
1945 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1946 ; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
1947 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1948 ; X64-NEXT: # kill: def $al killed $al killed $eax
1949 ; X64-NEXT: retq # encoding: [0xc3]
1950 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
1954 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
1956 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
1957 ; CHECK-LABEL: test_pcmpgt_d_128:
1959 ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
1960 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1961 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1962 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1963 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
1967 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1968 ; X86-LABEL: test_mask_pcmpgt_d_128:
1970 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1971 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1972 ; X86-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
1973 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1974 ; X86-NEXT: # kill: def $al killed $al killed $eax
1975 ; X86-NEXT: retl # encoding: [0xc3]
1977 ; X64-LABEL: test_mask_pcmpgt_d_128:
1979 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1980 ; X64-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
1981 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1982 ; X64-NEXT: # kill: def $al killed $al killed $eax
1983 ; X64-NEXT: retq # encoding: [0xc3]
1984 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
1988 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
1990 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
1991 ; CHECK-LABEL: test_pcmpgt_q_128:
1993 ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
1994 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1995 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
1996 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1997 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
2001 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2002 ; X86-LABEL: test_mask_pcmpgt_q_128:
2004 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2005 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2006 ; X86-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
2007 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2008 ; X86-NEXT: # kill: def $al killed $al killed $eax
2009 ; X86-NEXT: retl # encoding: [0xc3]
2011 ; X64-LABEL: test_mask_pcmpgt_q_128:
2013 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2014 ; X64-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
2015 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2016 ; X64-NEXT: # kill: def $al killed $al killed $eax
2017 ; X64-NEXT: retq # encoding: [0xc3]
2018 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
2022 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
2024 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2026 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2027 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
2029 ; X86-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9]
2030 ; X86-NEXT: # xmm3 = xmm0[1],xmm1[1]
2031 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2032 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2033 ; X86-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
2034 ; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1]
2035 ; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2036 ; X86-NEXT: retl # encoding: [0xc3]
2038 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
2040 ; X64-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9]
2041 ; X64-NEXT: # xmm3 = xmm0[1],xmm1[1]
2042 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2043 ; X64-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
2044 ; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1]
2045 ; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2046 ; X64-NEXT: retq # encoding: [0xc3]
2047 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2048 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2049 %res2 = fadd <2 x double> %res, %res1
2050 ret <2 x double> %res2
2053 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2055 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2056 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
2058 ; X86-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9]
2059 ; X86-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2060 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2061 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2062 ; X86-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
2063 ; X86-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2064 ; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2065 ; X86-NEXT: retl # encoding: [0xc3]
2067 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
2069 ; X64-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9]
2070 ; X64-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2071 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2072 ; X64-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
2073 ; X64-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2074 ; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2075 ; X64-NEXT: retq # encoding: [0xc3]
2076 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2077 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2078 %res2 = fadd <4 x double> %res, %res1
2079 ret <4 x double> %res2
2082 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2084 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2085 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
2087 ; X86-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9]
2088 ; X86-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2089 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2090 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2091 ; X86-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
2092 ; X86-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2093 ; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2094 ; X86-NEXT: retl # encoding: [0xc3]
2096 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
2098 ; X64-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9]
2099 ; X64-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2100 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2101 ; X64-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
2102 ; X64-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2103 ; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2104 ; X64-NEXT: retq # encoding: [0xc3]
2105 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2106 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2107 %res2 = fadd <4 x float> %res, %res1
2108 ret <4 x float> %res2
2111 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2113 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2114 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
2116 ; X86-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9]
2117 ; X86-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2118 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2119 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2120 ; X86-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
2121 ; X86-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2122 ; X86-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2123 ; X86-NEXT: retl # encoding: [0xc3]
2125 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
2127 ; X64-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9]
2128 ; X64-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2129 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2130 ; X64-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
2131 ; X64-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2132 ; X64-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2133 ; X64-NEXT: retq # encoding: [0xc3]
2134 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2135 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2136 %res2 = fadd <8 x float> %res, %res1
2137 ret <8 x float> %res2
2140 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2142 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2143 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
2145 ; X86-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9]
2146 ; X86-NEXT: # xmm3 = xmm0[0],xmm1[0]
2147 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2148 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2149 ; X86-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
2150 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0]
2151 ; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2152 ; X86-NEXT: retl # encoding: [0xc3]
2154 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
2156 ; X64-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9]
2157 ; X64-NEXT: # xmm3 = xmm0[0],xmm1[0]
2158 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2159 ; X64-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
2160 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0]
2161 ; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
2162 ; X64-NEXT: retq # encoding: [0xc3]
2163 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2164 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2165 %res2 = fadd <2 x double> %res, %res1
2166 ret <2 x double> %res2
2169 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2171 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2172 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
2174 ; X86-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9]
2175 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2176 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2177 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2178 ; X86-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
2179 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2180 ; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2181 ; X86-NEXT: retl # encoding: [0xc3]
2183 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
2185 ; X64-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9]
2186 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2187 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2188 ; X64-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
2189 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2190 ; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
2191 ; X64-NEXT: retq # encoding: [0xc3]
2192 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2193 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2194 %res2 = fadd <4 x double> %res, %res1
2195 ret <4 x double> %res2
2198 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2200 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2201 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
2203 ; X86-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9]
2204 ; X86-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2205 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2206 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2207 ; X86-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
2208 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2209 ; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2210 ; X86-NEXT: retl # encoding: [0xc3]
2212 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
2214 ; X64-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9]
2215 ; X64-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2216 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2217 ; X64-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
2218 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2219 ; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
2220 ; X64-NEXT: retq # encoding: [0xc3]
2221 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2222 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2223 %res2 = fadd <4 x float> %res, %res1
2224 ret <4 x float> %res2
2227 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2229 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2230 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
2232 ; X86-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9]
2233 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2234 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2235 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2236 ; X86-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
2237 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2238 ; X86-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2239 ; X86-NEXT: retl # encoding: [0xc3]
2241 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
2243 ; X64-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9]
2244 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2245 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2246 ; X64-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
2247 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2248 ; X64-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
2249 ; X64-NEXT: retq # encoding: [0xc3]
2250 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2251 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2252 %res2 = fadd <8 x float> %res, %res1
2253 ret <8 x float> %res2
2256 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2258 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2259 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
2261 ; X86-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9]
2262 ; X86-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2263 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2264 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2265 ; X86-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
2266 ; X86-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2267 ; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2268 ; X86-NEXT: retl # encoding: [0xc3]
2270 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
2272 ; X64-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9]
2273 ; X64-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2274 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2275 ; X64-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
2276 ; X64-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2277 ; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2278 ; X64-NEXT: retq # encoding: [0xc3]
2279 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2280 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2281 %res2 = add <4 x i32> %res, %res1
2285 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2287 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2288 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
2290 ; X86-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9]
2291 ; X86-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2292 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2293 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2294 ; X86-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
2295 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2296 ; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2297 ; X86-NEXT: retl # encoding: [0xc3]
2299 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
2301 ; X64-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9]
2302 ; X64-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2303 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2304 ; X64-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
2305 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2306 ; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
2307 ; X64-NEXT: retq # encoding: [0xc3]
2308 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2309 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2310 %res2 = add <4 x i32> %res, %res1
2314 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2316 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2317 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
2319 ; X86-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9]
2320 ; X86-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2321 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2322 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2323 ; X86-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
2324 ; X86-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2325 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2326 ; X86-NEXT: retl # encoding: [0xc3]
2328 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
2330 ; X64-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9]
2331 ; X64-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2332 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2333 ; X64-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
2334 ; X64-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2335 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2336 ; X64-NEXT: retq # encoding: [0xc3]
2337 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2338 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2339 %res2 = add <8 x i32> %res, %res1
2343 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2345 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2346 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
2348 ; X86-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9]
2349 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2350 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2351 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2352 ; X86-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
2353 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2354 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2355 ; X86-NEXT: retl # encoding: [0xc3]
2357 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
2359 ; X64-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9]
2360 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2361 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2362 ; X64-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
2363 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2364 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
2365 ; X64-NEXT: retq # encoding: [0xc3]
2366 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2367 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2368 %res2 = add <8 x i32> %res, %res1
2372 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2374 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2375 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
2377 ; X86-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9]
2378 ; X86-NEXT: # xmm3 = xmm0[1],xmm1[1]
2379 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2380 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2381 ; X86-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
2382 ; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1]
2383 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2384 ; X86-NEXT: retl # encoding: [0xc3]
2386 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
2388 ; X64-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9]
2389 ; X64-NEXT: # xmm3 = xmm0[1],xmm1[1]
2390 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2391 ; X64-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
2392 ; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1]
2393 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2394 ; X64-NEXT: retq # encoding: [0xc3]
2395 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2396 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2397 %res2 = add <2 x i64> %res, %res1
2401 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2403 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2404 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
2406 ; X86-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9]
2407 ; X86-NEXT: # xmm3 = xmm0[0],xmm1[0]
2408 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2409 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2410 ; X86-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
2411 ; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0]
2412 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2413 ; X86-NEXT: retl # encoding: [0xc3]
2415 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
2417 ; X64-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9]
2418 ; X64-NEXT: # xmm3 = xmm0[0],xmm1[0]
2419 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2420 ; X64-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
2421 ; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0]
2422 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
2423 ; X64-NEXT: retq # encoding: [0xc3]
2424 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2425 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2426 %res2 = add <2 x i64> %res, %res1
2430 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2432 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
2433 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
2435 ; X86-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9]
2436 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2437 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2438 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2439 ; X86-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
2440 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2441 ; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2442 ; X86-NEXT: retl # encoding: [0xc3]
2444 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
2446 ; X64-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9]
2447 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2448 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2449 ; X64-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
2450 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2451 ; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2452 ; X64-NEXT: retq # encoding: [0xc3]
2453 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
2454 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
2455 %res2 = add <4 x i64> %res, %res1
2459 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2461 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
2462 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
2464 ; X86-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9]
2465 ; X86-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2466 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2467 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2468 ; X86-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
2469 ; X86-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2470 ; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2471 ; X86-NEXT: retl # encoding: [0xc3]
2473 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
2475 ; X64-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9]
2476 ; X64-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2477 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2478 ; X64-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
2479 ; X64-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2480 ; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
2481 ; X64-NEXT: retq # encoding: [0xc3]
2482 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
2483 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
2484 %res2 = add <4 x i64> %res, %res1
2488 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2489 ; CHECK-LABEL: test_mask_and_epi32_rr_128:
2491 ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
2492 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2493 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2497 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
2498 ; X86-LABEL: test_mask_and_epi32_rrk_128:
2500 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2501 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2502 ; X86-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
2503 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2504 ; X86-NEXT: retl # encoding: [0xc3]
2506 ; X64-LABEL: test_mask_and_epi32_rrk_128:
2508 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2509 ; X64-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
2510 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2511 ; X64-NEXT: retq # encoding: [0xc3]
2512 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2516 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2517 ; X86-LABEL: test_mask_and_epi32_rrkz_128:
2519 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2520 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2521 ; X86-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
2522 ; X86-NEXT: retl # encoding: [0xc3]
2524 ; X64-LABEL: test_mask_and_epi32_rrkz_128:
2526 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2527 ; X64-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
2528 ; X64-NEXT: retq # encoding: [0xc3]
2529 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2533 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2534 ; X86-LABEL: test_mask_and_epi32_rm_128:
2536 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2537 ; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00]
2538 ; X86-NEXT: retl # encoding: [0xc3]
2540 ; X64-LABEL: test_mask_and_epi32_rm_128:
2542 ; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07]
2543 ; X64-NEXT: retq # encoding: [0xc3]
2544 %b = load <4 x i32>, <4 x i32>* %ptr_b
2545 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2549 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2550 ; X86-LABEL: test_mask_and_epi32_rmk_128:
2552 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2553 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2554 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2555 ; X86-NEXT: vpandd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x08]
2556 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2557 ; X86-NEXT: retl # encoding: [0xc3]
2559 ; X64-LABEL: test_mask_and_epi32_rmk_128:
2561 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2562 ; X64-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
2563 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2564 ; X64-NEXT: retq # encoding: [0xc3]
2565 %b = load <4 x i32>, <4 x i32>* %ptr_b
2566 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2570 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2571 ; X86-LABEL: test_mask_and_epi32_rmkz_128:
2573 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2574 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2575 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2576 ; X86-NEXT: vpandd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x00]
2577 ; X86-NEXT: retl # encoding: [0xc3]
2579 ; X64-LABEL: test_mask_and_epi32_rmkz_128:
2581 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2582 ; X64-NEXT: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
2583 ; X64-NEXT: retq # encoding: [0xc3]
2584 %b = load <4 x i32>, <4 x i32>* %ptr_b
2585 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2589 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2590 ; X86-LABEL: test_mask_and_epi32_rmb_128:
2592 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2593 ; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x00]
2594 ; X86-NEXT: retl # encoding: [0xc3]
2596 ; X64-LABEL: test_mask_and_epi32_rmb_128:
2598 ; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
2599 ; X64-NEXT: retq # encoding: [0xc3]
2600 %q = load i32, i32* %ptr_b
2601 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2602 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2603 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2607 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2608 ; X86-LABEL: test_mask_and_epi32_rmbk_128:
2610 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2611 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2612 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2613 ; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x08]
2614 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2615 ; X86-NEXT: retl # encoding: [0xc3]
2617 ; X64-LABEL: test_mask_and_epi32_rmbk_128:
2619 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2620 ; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
2621 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2622 ; X64-NEXT: retq # encoding: [0xc3]
2623 %q = load i32, i32* %ptr_b
2624 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2625 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2626 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2630 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2631 ; X86-LABEL: test_mask_and_epi32_rmbkz_128:
2633 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2634 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2635 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2636 ; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x00]
2637 ; X86-NEXT: retl # encoding: [0xc3]
2639 ; X64-LABEL: test_mask_and_epi32_rmbkz_128:
2641 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2642 ; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
2643 ; X64-NEXT: retq # encoding: [0xc3]
2644 %q = load i32, i32* %ptr_b
2645 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2646 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2647 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2651 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2653 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2654 ; CHECK-LABEL: test_mask_and_epi32_rr_256:
2656 ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1]
2657 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2658 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2662 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2663 ; X86-LABEL: test_mask_and_epi32_rrk_256:
2665 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2666 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2667 ; X86-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
2668 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2669 ; X86-NEXT: retl # encoding: [0xc3]
2671 ; X64-LABEL: test_mask_and_epi32_rrk_256:
2673 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2674 ; X64-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
2675 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2676 ; X64-NEXT: retq # encoding: [0xc3]
2677 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2681 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2682 ; X86-LABEL: test_mask_and_epi32_rrkz_256:
2684 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2685 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2686 ; X86-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
2687 ; X86-NEXT: retl # encoding: [0xc3]
2689 ; X64-LABEL: test_mask_and_epi32_rrkz_256:
2691 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2692 ; X64-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
2693 ; X64-NEXT: retq # encoding: [0xc3]
2694 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2698 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2699 ; X86-LABEL: test_mask_and_epi32_rm_256:
2701 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2702 ; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00]
2703 ; X86-NEXT: retl # encoding: [0xc3]
2705 ; X64-LABEL: test_mask_and_epi32_rm_256:
2707 ; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07]
2708 ; X64-NEXT: retq # encoding: [0xc3]
2709 %b = load <8 x i32>, <8 x i32>* %ptr_b
2710 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2714 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2715 ; X86-LABEL: test_mask_and_epi32_rmk_256:
2717 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2718 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2719 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2720 ; X86-NEXT: vpandd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x08]
2721 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2722 ; X86-NEXT: retl # encoding: [0xc3]
2724 ; X64-LABEL: test_mask_and_epi32_rmk_256:
2726 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2727 ; X64-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
2728 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2729 ; X64-NEXT: retq # encoding: [0xc3]
2730 %b = load <8 x i32>, <8 x i32>* %ptr_b
2731 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2735 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
2736 ; X86-LABEL: test_mask_and_epi32_rmkz_256:
2738 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2739 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2740 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2741 ; X86-NEXT: vpandd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x00]
2742 ; X86-NEXT: retl # encoding: [0xc3]
2744 ; X64-LABEL: test_mask_and_epi32_rmkz_256:
2746 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2747 ; X64-NEXT: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
2748 ; X64-NEXT: retq # encoding: [0xc3]
2749 %b = load <8 x i32>, <8 x i32>* %ptr_b
2750 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2754 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2755 ; X86-LABEL: test_mask_and_epi32_rmb_256:
2757 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2758 ; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x00]
2759 ; X86-NEXT: retl # encoding: [0xc3]
2761 ; X64-LABEL: test_mask_and_epi32_rmb_256:
2763 ; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
2764 ; X64-NEXT: retq # encoding: [0xc3]
2765 %q = load i32, i32* %ptr_b
2766 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2767 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2768 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2772 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2773 ; X86-LABEL: test_mask_and_epi32_rmbk_256:
2775 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2776 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2777 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2778 ; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x08]
2779 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2780 ; X86-NEXT: retl # encoding: [0xc3]
2782 ; X64-LABEL: test_mask_and_epi32_rmbk_256:
2784 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2785 ; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
2786 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2787 ; X64-NEXT: retq # encoding: [0xc3]
2788 %q = load i32, i32* %ptr_b
2789 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2790 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2791 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2795 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
2796 ; X86-LABEL: test_mask_and_epi32_rmbkz_256:
2798 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2799 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2800 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2801 ; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x00]
2802 ; X86-NEXT: retl # encoding: [0xc3]
2804 ; X64-LABEL: test_mask_and_epi32_rmbkz_256:
2806 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2807 ; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
2808 ; X64-NEXT: retq # encoding: [0xc3]
2809 %q = load i32, i32* %ptr_b
2810 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2811 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2812 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2816 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2818 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2819 ; CHECK-LABEL: test_mask_or_epi32_rr_128:
2821 ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
2822 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2823 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2827 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
2828 ; X86-LABEL: test_mask_or_epi32_rrk_128:
2830 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2831 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2832 ; X86-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
2833 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2834 ; X86-NEXT: retl # encoding: [0xc3]
2836 ; X64-LABEL: test_mask_or_epi32_rrk_128:
2838 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2839 ; X64-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
2840 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2841 ; X64-NEXT: retq # encoding: [0xc3]
2842 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2846 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2847 ; X86-LABEL: test_mask_or_epi32_rrkz_128:
2849 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2850 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2851 ; X86-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
2852 ; X86-NEXT: retl # encoding: [0xc3]
2854 ; X64-LABEL: test_mask_or_epi32_rrkz_128:
2856 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2857 ; X64-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
2858 ; X64-NEXT: retq # encoding: [0xc3]
2859 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2863 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2864 ; X86-LABEL: test_mask_or_epi32_rm_128:
2866 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2867 ; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00]
2868 ; X86-NEXT: retl # encoding: [0xc3]
2870 ; X64-LABEL: test_mask_or_epi32_rm_128:
2872 ; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07]
2873 ; X64-NEXT: retq # encoding: [0xc3]
2874 %b = load <4 x i32>, <4 x i32>* %ptr_b
2875 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2879 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2880 ; X86-LABEL: test_mask_or_epi32_rmk_128:
2882 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2883 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2884 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2885 ; X86-NEXT: vpord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x08]
2886 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2887 ; X86-NEXT: retl # encoding: [0xc3]
2889 ; X64-LABEL: test_mask_or_epi32_rmk_128:
2891 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2892 ; X64-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
2893 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2894 ; X64-NEXT: retq # encoding: [0xc3]
2895 %b = load <4 x i32>, <4 x i32>* %ptr_b
2896 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2900 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2901 ; X86-LABEL: test_mask_or_epi32_rmkz_128:
2903 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2904 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2905 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2906 ; X86-NEXT: vpord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x00]
2907 ; X86-NEXT: retl # encoding: [0xc3]
2909 ; X64-LABEL: test_mask_or_epi32_rmkz_128:
2911 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2912 ; X64-NEXT: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
2913 ; X64-NEXT: retq # encoding: [0xc3]
2914 %b = load <4 x i32>, <4 x i32>* %ptr_b
2915 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2919 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2920 ; X86-LABEL: test_mask_or_epi32_rmb_128:
2922 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2923 ; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x00]
2924 ; X86-NEXT: retl # encoding: [0xc3]
2926 ; X64-LABEL: test_mask_or_epi32_rmb_128:
2928 ; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
2929 ; X64-NEXT: retq # encoding: [0xc3]
2930 %q = load i32, i32* %ptr_b
2931 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2932 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2933 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2937 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2938 ; X86-LABEL: test_mask_or_epi32_rmbk_128:
2940 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2941 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2942 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2943 ; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x08]
2944 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2945 ; X86-NEXT: retl # encoding: [0xc3]
2947 ; X64-LABEL: test_mask_or_epi32_rmbk_128:
2949 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2950 ; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
2951 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2952 ; X64-NEXT: retq # encoding: [0xc3]
2953 %q = load i32, i32* %ptr_b
2954 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2955 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2956 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2960 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2961 ; X86-LABEL: test_mask_or_epi32_rmbkz_128:
2963 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2964 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2965 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2966 ; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x00]
2967 ; X86-NEXT: retl # encoding: [0xc3]
2969 ; X64-LABEL: test_mask_or_epi32_rmbkz_128:
2971 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2972 ; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
2973 ; X64-NEXT: retq # encoding: [0xc3]
2974 %q = load i32, i32* %ptr_b
2975 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2976 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2977 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2981 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2983 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2984 ; CHECK-LABEL: test_mask_or_epi32_rr_256:
2986 ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1]
2987 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2988 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2992 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2993 ; X86-LABEL: test_mask_or_epi32_rrk_256:
2995 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2996 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2997 ; X86-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
2998 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2999 ; X86-NEXT: retl # encoding: [0xc3]
3001 ; X64-LABEL: test_mask_or_epi32_rrk_256:
3003 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3004 ; X64-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
3005 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3006 ; X64-NEXT: retq # encoding: [0xc3]
3007 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3011 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3012 ; X86-LABEL: test_mask_or_epi32_rrkz_256:
3014 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3015 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3016 ; X86-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
3017 ; X86-NEXT: retl # encoding: [0xc3]
3019 ; X64-LABEL: test_mask_or_epi32_rrkz_256:
3021 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3022 ; X64-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
3023 ; X64-NEXT: retq # encoding: [0xc3]
3024 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3028 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3029 ; X86-LABEL: test_mask_or_epi32_rm_256:
3031 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3032 ; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00]
3033 ; X86-NEXT: retl # encoding: [0xc3]
3035 ; X64-LABEL: test_mask_or_epi32_rm_256:
3037 ; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07]
3038 ; X64-NEXT: retq # encoding: [0xc3]
3039 %b = load <8 x i32>, <8 x i32>* %ptr_b
3040 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3044 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3045 ; X86-LABEL: test_mask_or_epi32_rmk_256:
3047 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3048 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3049 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3050 ; X86-NEXT: vpord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x08]
3051 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3052 ; X86-NEXT: retl # encoding: [0xc3]
3054 ; X64-LABEL: test_mask_or_epi32_rmk_256:
3056 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3057 ; X64-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
3058 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3059 ; X64-NEXT: retq # encoding: [0xc3]
3060 %b = load <8 x i32>, <8 x i32>* %ptr_b
3061 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3065 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3066 ; X86-LABEL: test_mask_or_epi32_rmkz_256:
3068 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3069 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3070 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3071 ; X86-NEXT: vpord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x00]
3072 ; X86-NEXT: retl # encoding: [0xc3]
3074 ; X64-LABEL: test_mask_or_epi32_rmkz_256:
3076 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3077 ; X64-NEXT: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
3078 ; X64-NEXT: retq # encoding: [0xc3]
3079 %b = load <8 x i32>, <8 x i32>* %ptr_b
3080 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3084 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3085 ; X86-LABEL: test_mask_or_epi32_rmb_256:
3087 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3088 ; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x00]
3089 ; X86-NEXT: retl # encoding: [0xc3]
3091 ; X64-LABEL: test_mask_or_epi32_rmb_256:
3093 ; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
3094 ; X64-NEXT: retq # encoding: [0xc3]
3095 %q = load i32, i32* %ptr_b
3096 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3097 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3098 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3102 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3103 ; X86-LABEL: test_mask_or_epi32_rmbk_256:
3105 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3106 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3107 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3108 ; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x08]
3109 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3110 ; X86-NEXT: retl # encoding: [0xc3]
3112 ; X64-LABEL: test_mask_or_epi32_rmbk_256:
3114 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3115 ; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
3116 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3117 ; X64-NEXT: retq # encoding: [0xc3]
3118 %q = load i32, i32* %ptr_b
3119 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3120 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3121 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3125 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3126 ; X86-LABEL: test_mask_or_epi32_rmbkz_256:
3128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3129 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3130 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3131 ; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x00]
3132 ; X86-NEXT: retl # encoding: [0xc3]
3134 ; X64-LABEL: test_mask_or_epi32_rmbkz_256:
3136 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3137 ; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
3138 ; X64-NEXT: retq # encoding: [0xc3]
3139 %q = load i32, i32* %ptr_b
3140 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3141 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3142 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3146 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3148 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3149 ; CHECK-LABEL: test_mask_xor_epi32_rr_128:
3151 ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
3152 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3153 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3157 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3158 ; X86-LABEL: test_mask_xor_epi32_rrk_128:
3160 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3161 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3162 ; X86-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
3163 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3164 ; X86-NEXT: retl # encoding: [0xc3]
3166 ; X64-LABEL: test_mask_xor_epi32_rrk_128:
3168 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3169 ; X64-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
3170 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3171 ; X64-NEXT: retq # encoding: [0xc3]
3172 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3176 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3177 ; X86-LABEL: test_mask_xor_epi32_rrkz_128:
3179 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3180 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3181 ; X86-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
3182 ; X86-NEXT: retl # encoding: [0xc3]
3184 ; X64-LABEL: test_mask_xor_epi32_rrkz_128:
3186 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3187 ; X64-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
3188 ; X64-NEXT: retq # encoding: [0xc3]
3189 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3193 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3194 ; X86-LABEL: test_mask_xor_epi32_rm_128:
3196 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3197 ; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00]
3198 ; X86-NEXT: retl # encoding: [0xc3]
3200 ; X64-LABEL: test_mask_xor_epi32_rm_128:
3202 ; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07]
3203 ; X64-NEXT: retq # encoding: [0xc3]
3204 %b = load <4 x i32>, <4 x i32>* %ptr_b
3205 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3209 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3210 ; X86-LABEL: test_mask_xor_epi32_rmk_128:
3212 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3213 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3214 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3215 ; X86-NEXT: vpxord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x08]
3216 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3217 ; X86-NEXT: retl # encoding: [0xc3]
3219 ; X64-LABEL: test_mask_xor_epi32_rmk_128:
3221 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3222 ; X64-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
3223 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3224 ; X64-NEXT: retq # encoding: [0xc3]
3225 %b = load <4 x i32>, <4 x i32>* %ptr_b
3226 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3230 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3231 ; X86-LABEL: test_mask_xor_epi32_rmkz_128:
3233 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3234 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3235 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3236 ; X86-NEXT: vpxord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x00]
3237 ; X86-NEXT: retl # encoding: [0xc3]
3239 ; X64-LABEL: test_mask_xor_epi32_rmkz_128:
3241 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3242 ; X64-NEXT: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
3243 ; X64-NEXT: retq # encoding: [0xc3]
3244 %b = load <4 x i32>, <4 x i32>* %ptr_b
3245 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3249 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3250 ; X86-LABEL: test_mask_xor_epi32_rmb_128:
3252 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3253 ; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x00]
3254 ; X86-NEXT: retl # encoding: [0xc3]
3256 ; X64-LABEL: test_mask_xor_epi32_rmb_128:
3258 ; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
3259 ; X64-NEXT: retq # encoding: [0xc3]
3260 %q = load i32, i32* %ptr_b
3261 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3262 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3263 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3267 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3268 ; X86-LABEL: test_mask_xor_epi32_rmbk_128:
3270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3271 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3272 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3273 ; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x08]
3274 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3275 ; X86-NEXT: retl # encoding: [0xc3]
3277 ; X64-LABEL: test_mask_xor_epi32_rmbk_128:
3279 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3280 ; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
3281 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3282 ; X64-NEXT: retq # encoding: [0xc3]
3283 %q = load i32, i32* %ptr_b
3284 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3285 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3286 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3290 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3291 ; X86-LABEL: test_mask_xor_epi32_rmbkz_128:
3293 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3295 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3296 ; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x00]
3297 ; X86-NEXT: retl # encoding: [0xc3]
3299 ; X64-LABEL: test_mask_xor_epi32_rmbkz_128:
3301 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3302 ; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
3303 ; X64-NEXT: retq # encoding: [0xc3]
3304 %q = load i32, i32* %ptr_b
3305 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3306 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3307 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3311 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3313 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3314 ; CHECK-LABEL: test_mask_xor_epi32_rr_256:
3316 ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1]
3317 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3318 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3322 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3323 ; X86-LABEL: test_mask_xor_epi32_rrk_256:
3325 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3326 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3327 ; X86-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
3328 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3329 ; X86-NEXT: retl # encoding: [0xc3]
3331 ; X64-LABEL: test_mask_xor_epi32_rrk_256:
3333 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3334 ; X64-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
3335 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3336 ; X64-NEXT: retq # encoding: [0xc3]
3337 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3341 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3342 ; X86-LABEL: test_mask_xor_epi32_rrkz_256:
3344 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3345 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3346 ; X86-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
3347 ; X86-NEXT: retl # encoding: [0xc3]
3349 ; X64-LABEL: test_mask_xor_epi32_rrkz_256:
3351 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3352 ; X64-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
3353 ; X64-NEXT: retq # encoding: [0xc3]
3354 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3358 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3359 ; X86-LABEL: test_mask_xor_epi32_rm_256:
3361 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3362 ; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00]
3363 ; X86-NEXT: retl # encoding: [0xc3]
3365 ; X64-LABEL: test_mask_xor_epi32_rm_256:
3367 ; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07]
3368 ; X64-NEXT: retq # encoding: [0xc3]
3369 %b = load <8 x i32>, <8 x i32>* %ptr_b
3370 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3374 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3375 ; X86-LABEL: test_mask_xor_epi32_rmk_256:
3377 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3378 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3379 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3380 ; X86-NEXT: vpxord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x08]
3381 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3382 ; X86-NEXT: retl # encoding: [0xc3]
3384 ; X64-LABEL: test_mask_xor_epi32_rmk_256:
3386 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3387 ; X64-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
3388 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3389 ; X64-NEXT: retq # encoding: [0xc3]
3390 %b = load <8 x i32>, <8 x i32>* %ptr_b
3391 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3395 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3396 ; X86-LABEL: test_mask_xor_epi32_rmkz_256:
3398 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3399 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3400 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3401 ; X86-NEXT: vpxord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x00]
3402 ; X86-NEXT: retl # encoding: [0xc3]
3404 ; X64-LABEL: test_mask_xor_epi32_rmkz_256:
3406 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3407 ; X64-NEXT: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
3408 ; X64-NEXT: retq # encoding: [0xc3]
3409 %b = load <8 x i32>, <8 x i32>* %ptr_b
3410 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3414 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3415 ; X86-LABEL: test_mask_xor_epi32_rmb_256:
3417 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3418 ; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x00]
3419 ; X86-NEXT: retl # encoding: [0xc3]
3421 ; X64-LABEL: test_mask_xor_epi32_rmb_256:
3423 ; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
3424 ; X64-NEXT: retq # encoding: [0xc3]
3425 %q = load i32, i32* %ptr_b
3426 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3427 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3428 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3432 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3433 ; X86-LABEL: test_mask_xor_epi32_rmbk_256:
3435 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3436 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3437 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3438 ; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x08]
3439 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3440 ; X86-NEXT: retl # encoding: [0xc3]
3442 ; X64-LABEL: test_mask_xor_epi32_rmbk_256:
3444 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3445 ; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
3446 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3447 ; X64-NEXT: retq # encoding: [0xc3]
3448 %q = load i32, i32* %ptr_b
3449 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3450 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3451 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3455 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3456 ; X86-LABEL: test_mask_xor_epi32_rmbkz_256:
3458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3459 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3460 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3461 ; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x00]
3462 ; X86-NEXT: retl # encoding: [0xc3]
3464 ; X64-LABEL: test_mask_xor_epi32_rmbkz_256:
3466 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3467 ; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
3468 ; X64-NEXT: retq # encoding: [0xc3]
3469 %q = load i32, i32* %ptr_b
3470 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3471 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3472 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3476 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3478 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3479 ; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
3481 ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3482 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3483 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3487 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3488 ; X86-LABEL: test_mask_andnot_epi32_rrk_128:
3490 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3491 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3492 ; X86-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
3493 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3494 ; X86-NEXT: retl # encoding: [0xc3]
3496 ; X64-LABEL: test_mask_andnot_epi32_rrk_128:
3498 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3499 ; X64-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
3500 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3501 ; X64-NEXT: retq # encoding: [0xc3]
3502 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3506 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3507 ; X86-LABEL: test_mask_andnot_epi32_rrkz_128:
3509 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3510 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3511 ; X86-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
3512 ; X86-NEXT: retl # encoding: [0xc3]
3514 ; X64-LABEL: test_mask_andnot_epi32_rrkz_128:
3516 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3517 ; X64-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
3518 ; X64-NEXT: retq # encoding: [0xc3]
3519 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3523 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3524 ; X86-LABEL: test_mask_andnot_epi32_rm_128:
3526 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3527 ; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
3528 ; X86-NEXT: retl # encoding: [0xc3]
3530 ; X64-LABEL: test_mask_andnot_epi32_rm_128:
3532 ; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
3533 ; X64-NEXT: retq # encoding: [0xc3]
3534 %b = load <4 x i32>, <4 x i32>* %ptr_b
3535 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3539 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3540 ; X86-LABEL: test_mask_andnot_epi32_rmk_128:
3542 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3543 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3544 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3545 ; X86-NEXT: vpandnd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x08]
3546 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3547 ; X86-NEXT: retl # encoding: [0xc3]
3549 ; X64-LABEL: test_mask_andnot_epi32_rmk_128:
3551 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3552 ; X64-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
3553 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3554 ; X64-NEXT: retq # encoding: [0xc3]
3555 %b = load <4 x i32>, <4 x i32>* %ptr_b
3556 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3560 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3561 ; X86-LABEL: test_mask_andnot_epi32_rmkz_128:
3563 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3564 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3565 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3566 ; X86-NEXT: vpandnd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x00]
3567 ; X86-NEXT: retl # encoding: [0xc3]
3569 ; X64-LABEL: test_mask_andnot_epi32_rmkz_128:
3571 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3572 ; X64-NEXT: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
3573 ; X64-NEXT: retq # encoding: [0xc3]
3574 %b = load <4 x i32>, <4 x i32>* %ptr_b
3575 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3579 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3580 ; X86-LABEL: test_mask_andnot_epi32_rmb_128:
3582 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3583 ; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x00]
3584 ; X86-NEXT: retl # encoding: [0xc3]
3586 ; X64-LABEL: test_mask_andnot_epi32_rmb_128:
3588 ; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
3589 ; X64-NEXT: retq # encoding: [0xc3]
3590 %q = load i32, i32* %ptr_b
3591 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3592 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3593 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3597 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3598 ; X86-LABEL: test_mask_andnot_epi32_rmbk_128:
3600 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3601 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3602 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3603 ; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x08]
3604 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3605 ; X86-NEXT: retl # encoding: [0xc3]
3607 ; X64-LABEL: test_mask_andnot_epi32_rmbk_128:
3609 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3610 ; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
3611 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3612 ; X64-NEXT: retq # encoding: [0xc3]
3613 %q = load i32, i32* %ptr_b
3614 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3615 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3616 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3620 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3621 ; X86-LABEL: test_mask_andnot_epi32_rmbkz_128:
3623 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3624 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3625 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3626 ; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x00]
3627 ; X86-NEXT: retl # encoding: [0xc3]
3629 ; X64-LABEL: test_mask_andnot_epi32_rmbkz_128:
3631 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3632 ; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
3633 ; X64-NEXT: retq # encoding: [0xc3]
3634 %q = load i32, i32* %ptr_b
3635 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3636 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3637 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3641 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3643 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3644 ; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
3646 ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
3647 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3648 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3652 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3653 ; X86-LABEL: test_mask_andnot_epi32_rrk_256:
3655 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3656 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3657 ; X86-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
3658 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3659 ; X86-NEXT: retl # encoding: [0xc3]
3661 ; X64-LABEL: test_mask_andnot_epi32_rrk_256:
3663 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3664 ; X64-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
3665 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3666 ; X64-NEXT: retq # encoding: [0xc3]
3667 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3671 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3672 ; X86-LABEL: test_mask_andnot_epi32_rrkz_256:
3674 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3675 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3676 ; X86-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
3677 ; X86-NEXT: retl # encoding: [0xc3]
3679 ; X64-LABEL: test_mask_andnot_epi32_rrkz_256:
3681 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3682 ; X64-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
3683 ; X64-NEXT: retq # encoding: [0xc3]
3684 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3688 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3689 ; X86-LABEL: test_mask_andnot_epi32_rm_256:
3691 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3692 ; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
3693 ; X86-NEXT: retl # encoding: [0xc3]
3695 ; X64-LABEL: test_mask_andnot_epi32_rm_256:
3697 ; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
3698 ; X64-NEXT: retq # encoding: [0xc3]
3699 %b = load <8 x i32>, <8 x i32>* %ptr_b
3700 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3704 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3705 ; X86-LABEL: test_mask_andnot_epi32_rmk_256:
3707 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3708 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3709 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3710 ; X86-NEXT: vpandnd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x08]
3711 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3712 ; X86-NEXT: retl # encoding: [0xc3]
3714 ; X64-LABEL: test_mask_andnot_epi32_rmk_256:
3716 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3717 ; X64-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
3718 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3719 ; X64-NEXT: retq # encoding: [0xc3]
3720 %b = load <8 x i32>, <8 x i32>* %ptr_b
3721 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3725 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3726 ; X86-LABEL: test_mask_andnot_epi32_rmkz_256:
3728 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3729 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3730 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3731 ; X86-NEXT: vpandnd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x00]
3732 ; X86-NEXT: retl # encoding: [0xc3]
3734 ; X64-LABEL: test_mask_andnot_epi32_rmkz_256:
3736 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3737 ; X64-NEXT: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
3738 ; X64-NEXT: retq # encoding: [0xc3]
3739 %b = load <8 x i32>, <8 x i32>* %ptr_b
3740 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3744 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3745 ; X86-LABEL: test_mask_andnot_epi32_rmb_256:
3747 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3748 ; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x00]
3749 ; X86-NEXT: retl # encoding: [0xc3]
3751 ; X64-LABEL: test_mask_andnot_epi32_rmb_256:
3753 ; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
3754 ; X64-NEXT: retq # encoding: [0xc3]
3755 %q = load i32, i32* %ptr_b
3756 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3757 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3758 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3762 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3763 ; X86-LABEL: test_mask_andnot_epi32_rmbk_256:
3765 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3766 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3767 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3768 ; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x08]
3769 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3770 ; X86-NEXT: retl # encoding: [0xc3]
3772 ; X64-LABEL: test_mask_andnot_epi32_rmbk_256:
3774 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3775 ; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
3776 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3777 ; X64-NEXT: retq # encoding: [0xc3]
3778 %q = load i32, i32* %ptr_b
3779 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3780 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3781 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3785 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3786 ; X86-LABEL: test_mask_andnot_epi32_rmbkz_256:
3788 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3789 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3790 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3791 ; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x00]
3792 ; X86-NEXT: retl # encoding: [0xc3]
3794 ; X64-LABEL: test_mask_andnot_epi32_rmbkz_256:
3796 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3797 ; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
3798 ; X64-NEXT: retq # encoding: [0xc3]
3799 %q = load i32, i32* %ptr_b
3800 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3801 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3802 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3806 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3808 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
3809 ; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
3811 ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3812 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3813 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
3817 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
3818 ; X86-LABEL: test_mask_andnot_epi64_rrk_128:
3820 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3821 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3822 ; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
3823 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3824 ; X86-NEXT: retl # encoding: [0xc3]
3826 ; X64-LABEL: test_mask_andnot_epi64_rrk_128:
3828 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3829 ; X64-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
3830 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3831 ; X64-NEXT: retq # encoding: [0xc3]
3832 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
3836 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
3837 ; X86-LABEL: test_mask_andnot_epi64_rrkz_128:
3839 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3840 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3841 ; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
3842 ; X86-NEXT: retl # encoding: [0xc3]
3844 ; X64-LABEL: test_mask_andnot_epi64_rrkz_128:
3846 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3847 ; X64-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
3848 ; X64-NEXT: retq # encoding: [0xc3]
3849 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
3853 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
3854 ; X86-LABEL: test_mask_andnot_epi64_rm_128:
3856 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3857 ; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
3858 ; X86-NEXT: retl # encoding: [0xc3]
3860 ; X64-LABEL: test_mask_andnot_epi64_rm_128:
3862 ; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
3863 ; X64-NEXT: retq # encoding: [0xc3]
3864 %b = load <2 x i64>, <2 x i64>* %ptr_b
3865 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
3869 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
3870 ; X86-LABEL: test_mask_andnot_epi64_rmk_128:
3872 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3873 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3874 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3875 ; X86-NEXT: vpandnq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x08]
3876 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3877 ; X86-NEXT: retl # encoding: [0xc3]
3879 ; X64-LABEL: test_mask_andnot_epi64_rmk_128:
3881 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3882 ; X64-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
3883 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3884 ; X64-NEXT: retq # encoding: [0xc3]
3885 %b = load <2 x i64>, <2 x i64>* %ptr_b
3886 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
3890 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
3891 ; X86-LABEL: test_mask_andnot_epi64_rmkz_128:
3893 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3894 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3895 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3896 ; X86-NEXT: vpandnq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x00]
3897 ; X86-NEXT: retl # encoding: [0xc3]
3899 ; X64-LABEL: test_mask_andnot_epi64_rmkz_128:
3901 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3902 ; X64-NEXT: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
3903 ; X64-NEXT: retq # encoding: [0xc3]
3904 %b = load <2 x i64>, <2 x i64>* %ptr_b
3905 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
3909 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
3910 ; X86-LABEL: test_mask_andnot_epi64_rmb_128:
3912 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3913 ; X86-NEXT: vmovddup (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x08]
3914 ; X86-NEXT: # xmm1 = mem[0,0]
3915 ; X86-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3916 ; X86-NEXT: retl # encoding: [0xc3]
3918 ; X64-LABEL: test_mask_andnot_epi64_rmb_128:
3920 ; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
3921 ; X64-NEXT: retq # encoding: [0xc3]
3922 %q = load i64, i64* %ptr_b
3923 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
3924 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
3925 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
3929 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
3930 ; X86-LABEL: test_mask_andnot_epi64_rmbk_128:
3932 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3933 ; X86-NEXT: vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10]
3934 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3935 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3936 ; X86-NEXT: vpandnq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xca]
3937 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3938 ; X86-NEXT: retl # encoding: [0xc3]
3940 ; X64-LABEL: test_mask_andnot_epi64_rmbk_128:
3942 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3943 ; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
3944 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3945 ; X64-NEXT: retq # encoding: [0xc3]
3946 %q = load i64, i64* %ptr_b
3947 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
3948 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
3949 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
3953 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
3954 ; X86-LABEL: test_mask_andnot_epi64_rmbkz_128:
3956 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3957 ; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
3958 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
3959 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3960 ; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
3961 ; X86-NEXT: retl # encoding: [0xc3]
3963 ; X64-LABEL: test_mask_andnot_epi64_rmbkz_128:
3965 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3966 ; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
3967 ; X64-NEXT: retq # encoding: [0xc3]
3968 %q = load i64, i64* %ptr_b
3969 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
3970 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
3971 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
3975 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
3977 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
3978 ; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
3980 ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
3981 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3982 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
3986 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
3987 ; X86-LABEL: test_mask_andnot_epi64_rrk_256:
3989 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3990 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3991 ; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
3992 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3993 ; X86-NEXT: retl # encoding: [0xc3]
3995 ; X64-LABEL: test_mask_andnot_epi64_rrk_256:
3997 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3998 ; X64-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
3999 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4000 ; X64-NEXT: retq # encoding: [0xc3]
4001 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4005 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
4006 ; X86-LABEL: test_mask_andnot_epi64_rrkz_256:
4008 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4009 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4010 ; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4011 ; X86-NEXT: retl # encoding: [0xc3]
4013 ; X64-LABEL: test_mask_andnot_epi64_rrkz_256:
4015 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4016 ; X64-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4017 ; X64-NEXT: retq # encoding: [0xc3]
4018 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4022 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
4023 ; X86-LABEL: test_mask_andnot_epi64_rm_256:
4025 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4026 ; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
4027 ; X86-NEXT: retl # encoding: [0xc3]
4029 ; X64-LABEL: test_mask_andnot_epi64_rm_256:
4031 ; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
4032 ; X64-NEXT: retq # encoding: [0xc3]
4033 %b = load <4 x i64>, <4 x i64>* %ptr_b
4034 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4038 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
4039 ; X86-LABEL: test_mask_andnot_epi64_rmk_256:
4041 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4042 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4043 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4044 ; X86-NEXT: vpandnq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x08]
4045 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4046 ; X86-NEXT: retl # encoding: [0xc3]
4048 ; X64-LABEL: test_mask_andnot_epi64_rmk_256:
4050 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4051 ; X64-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
4052 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4053 ; X64-NEXT: retq # encoding: [0xc3]
4054 %b = load <4 x i64>, <4 x i64>* %ptr_b
4055 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4059 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
4060 ; X86-LABEL: test_mask_andnot_epi64_rmkz_256:
4062 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4063 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4064 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4065 ; X86-NEXT: vpandnq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x00]
4066 ; X86-NEXT: retl # encoding: [0xc3]
4068 ; X64-LABEL: test_mask_andnot_epi64_rmkz_256:
4070 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4071 ; X64-NEXT: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
4072 ; X64-NEXT: retq # encoding: [0xc3]
4073 %b = load <4 x i64>, <4 x i64>* %ptr_b
4074 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4078 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
4079 ; X86-LABEL: test_mask_andnot_epi64_rmb_256:
4081 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4082 ; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
4083 ; X86-NEXT: # xmm1 = mem[0],zero
4084 ; X86-NEXT: vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9]
4085 ; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
4086 ; X86-NEXT: retl # encoding: [0xc3]
4088 ; X64-LABEL: test_mask_andnot_epi64_rmb_256:
4090 ; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
4091 ; X64-NEXT: retq # encoding: [0xc3]
4092 %q = load i64, i64* %ptr_b
4093 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4094 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4095 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4099 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
4100 ; X86-LABEL: test_mask_andnot_epi64_rmbk_256:
4102 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4103 ; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
4104 ; X86-NEXT: # xmm2 = mem[0],zero
4105 ; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
4106 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4107 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4108 ; X86-NEXT: vpandnq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xca]
4109 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4110 ; X86-NEXT: retl # encoding: [0xc3]
4112 ; X64-LABEL: test_mask_andnot_epi64_rmbk_256:
4114 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4115 ; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
4116 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4117 ; X64-NEXT: retq # encoding: [0xc3]
4118 %q = load i64, i64* %ptr_b
4119 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4120 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4121 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4125 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
4126 ; X86-LABEL: test_mask_andnot_epi64_rmbkz_256:
4128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4129 ; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
4130 ; X86-NEXT: # xmm1 = mem[0],zero
4131 ; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
4132 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
4133 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4134 ; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4135 ; X86-NEXT: retl # encoding: [0xc3]
4137 ; X64-LABEL: test_mask_andnot_epi64_rmbkz_256:
4139 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4140 ; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
4141 ; X64-NEXT: retq # encoding: [0xc3]
4142 %q = load i64, i64* %ptr_b
4143 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4144 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4145 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4149 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
4151 define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4152 ; CHECK-LABEL: test_mask_add_epi32_rr_128:
4154 ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
4155 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4156 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4160 define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
4161 ; X86-LABEL: test_mask_add_epi32_rrk_128:
4163 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4164 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4165 ; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
4166 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4167 ; X86-NEXT: retl # encoding: [0xc3]
4169 ; X64-LABEL: test_mask_add_epi32_rrk_128:
4171 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4172 ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
4173 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4174 ; X64-NEXT: retq # encoding: [0xc3]
4175 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4179 define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4180 ; X86-LABEL: test_mask_add_epi32_rrkz_128:
4182 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4183 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4184 ; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
4185 ; X86-NEXT: retl # encoding: [0xc3]
4187 ; X64-LABEL: test_mask_add_epi32_rrkz_128:
4189 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4190 ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
4191 ; X64-NEXT: retq # encoding: [0xc3]
4192 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4196 define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4197 ; X86-LABEL: test_mask_add_epi32_rm_128:
4199 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4200 ; X86-NEXT: vpaddd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x00]
4201 ; X86-NEXT: retl # encoding: [0xc3]
4203 ; X64-LABEL: test_mask_add_epi32_rm_128:
4205 ; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07]
4206 ; X64-NEXT: retq # encoding: [0xc3]
4207 %b = load <4 x i32>, <4 x i32>* %ptr_b
4208 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4212 define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4213 ; X86-LABEL: test_mask_add_epi32_rmk_128:
4215 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4216 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4217 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4218 ; X86-NEXT: vpaddd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x08]
4219 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4220 ; X86-NEXT: retl # encoding: [0xc3]
4222 ; X64-LABEL: test_mask_add_epi32_rmk_128:
4224 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4225 ; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
4226 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4227 ; X64-NEXT: retq # encoding: [0xc3]
4228 %b = load <4 x i32>, <4 x i32>* %ptr_b
4229 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4233 define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4234 ; X86-LABEL: test_mask_add_epi32_rmkz_128:
4236 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4237 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4238 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4239 ; X86-NEXT: vpaddd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x00]
4240 ; X86-NEXT: retl # encoding: [0xc3]
4242 ; X64-LABEL: test_mask_add_epi32_rmkz_128:
4244 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4245 ; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
4246 ; X64-NEXT: retq # encoding: [0xc3]
4247 %b = load <4 x i32>, <4 x i32>* %ptr_b
4248 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4252 define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4253 ; X86-LABEL: test_mask_add_epi32_rmb_128:
4255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4256 ; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x00]
4257 ; X86-NEXT: retl # encoding: [0xc3]
4259 ; X64-LABEL: test_mask_add_epi32_rmb_128:
4261 ; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
4262 ; X64-NEXT: retq # encoding: [0xc3]
4263 %q = load i32, i32* %ptr_b
4264 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4265 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4266 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4270 define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4271 ; X86-LABEL: test_mask_add_epi32_rmbk_128:
4273 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4274 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4275 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4276 ; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x08]
4277 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4278 ; X86-NEXT: retl # encoding: [0xc3]
4280 ; X64-LABEL: test_mask_add_epi32_rmbk_128:
4282 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4283 ; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
4284 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4285 ; X64-NEXT: retq # encoding: [0xc3]
4286 %q = load i32, i32* %ptr_b
4287 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4288 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4289 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4293 define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4294 ; X86-LABEL: test_mask_add_epi32_rmbkz_128:
4296 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4297 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4298 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4299 ; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x00]
4300 ; X86-NEXT: retl # encoding: [0xc3]
4302 ; X64-LABEL: test_mask_add_epi32_rmbkz_128:
4304 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4305 ; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
4306 ; X64-NEXT: retq # encoding: [0xc3]
4307 %q = load i32, i32* %ptr_b
4308 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4309 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4310 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4314 declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4316 define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4317 ; CHECK-LABEL: test_mask_sub_epi32_rr_128:
4319 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
4320 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4321 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4325 define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
4326 ; X86-LABEL: test_mask_sub_epi32_rrk_128:
4328 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4329 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4330 ; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
4331 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4332 ; X86-NEXT: retl # encoding: [0xc3]
4334 ; X64-LABEL: test_mask_sub_epi32_rrk_128:
4336 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4337 ; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
4338 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4339 ; X64-NEXT: retq # encoding: [0xc3]
4340 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4344 define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4345 ; X86-LABEL: test_mask_sub_epi32_rrkz_128:
4347 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4348 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4349 ; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
4350 ; X86-NEXT: retl # encoding: [0xc3]
4352 ; X64-LABEL: test_mask_sub_epi32_rrkz_128:
4354 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4355 ; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
4356 ; X64-NEXT: retq # encoding: [0xc3]
4357 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4361 define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4362 ; X86-LABEL: test_mask_sub_epi32_rm_128:
4364 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4365 ; X86-NEXT: vpsubd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x00]
4366 ; X86-NEXT: retl # encoding: [0xc3]
4368 ; X64-LABEL: test_mask_sub_epi32_rm_128:
4370 ; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07]
4371 ; X64-NEXT: retq # encoding: [0xc3]
4372 %b = load <4 x i32>, <4 x i32>* %ptr_b
4373 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4377 define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4378 ; X86-LABEL: test_mask_sub_epi32_rmk_128:
4380 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4381 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4382 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4383 ; X86-NEXT: vpsubd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x08]
4384 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4385 ; X86-NEXT: retl # encoding: [0xc3]
4387 ; X64-LABEL: test_mask_sub_epi32_rmk_128:
4389 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4390 ; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
4391 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4392 ; X64-NEXT: retq # encoding: [0xc3]
4393 %b = load <4 x i32>, <4 x i32>* %ptr_b
4394 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4398 define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4399 ; X86-LABEL: test_mask_sub_epi32_rmkz_128:
4401 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4402 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4403 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4404 ; X86-NEXT: vpsubd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x00]
4405 ; X86-NEXT: retl # encoding: [0xc3]
4407 ; X64-LABEL: test_mask_sub_epi32_rmkz_128:
4409 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4410 ; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
4411 ; X64-NEXT: retq # encoding: [0xc3]
4412 %b = load <4 x i32>, <4 x i32>* %ptr_b
4413 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4417 define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4418 ; X86-LABEL: test_mask_sub_epi32_rmb_128:
4420 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4421 ; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x00]
4422 ; X86-NEXT: retl # encoding: [0xc3]
4424 ; X64-LABEL: test_mask_sub_epi32_rmb_128:
4426 ; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
4427 ; X64-NEXT: retq # encoding: [0xc3]
4428 %q = load i32, i32* %ptr_b
4429 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4430 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4431 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4435 define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4436 ; X86-LABEL: test_mask_sub_epi32_rmbk_128:
4438 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4439 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4440 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4441 ; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x08]
4442 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4443 ; X86-NEXT: retl # encoding: [0xc3]
4445 ; X64-LABEL: test_mask_sub_epi32_rmbk_128:
4447 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4448 ; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
4449 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4450 ; X64-NEXT: retq # encoding: [0xc3]
4451 %q = load i32, i32* %ptr_b
4452 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4453 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4454 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4458 define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4459 ; X86-LABEL: test_mask_sub_epi32_rmbkz_128:
4461 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4462 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4463 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4464 ; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x00]
4465 ; X86-NEXT: retl # encoding: [0xc3]
4467 ; X64-LABEL: test_mask_sub_epi32_rmbkz_128:
4469 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4470 ; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
4471 ; X64-NEXT: retq # encoding: [0xc3]
4472 %q = load i32, i32* %ptr_b
4473 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4474 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4475 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4479 declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4481 define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4482 ; CHECK-LABEL: test_mask_sub_epi32_rr_256:
4484 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
4485 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4486 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4490 define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
4491 ; X86-LABEL: test_mask_sub_epi32_rrk_256:
4493 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4494 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4495 ; X86-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
4496 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4497 ; X86-NEXT: retl # encoding: [0xc3]
4499 ; X64-LABEL: test_mask_sub_epi32_rrk_256:
4501 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4502 ; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
4503 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4504 ; X64-NEXT: retq # encoding: [0xc3]
4505 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4509 define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
4510 ; X86-LABEL: test_mask_sub_epi32_rrkz_256:
4512 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4513 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4514 ; X86-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
4515 ; X86-NEXT: retl # encoding: [0xc3]
4517 ; X64-LABEL: test_mask_sub_epi32_rrkz_256:
4519 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4520 ; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
4521 ; X64-NEXT: retq # encoding: [0xc3]
4522 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4526 define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4527 ; X86-LABEL: test_mask_sub_epi32_rm_256:
4529 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4530 ; X86-NEXT: vpsubd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x00]
4531 ; X86-NEXT: retl # encoding: [0xc3]
4533 ; X64-LABEL: test_mask_sub_epi32_rm_256:
4535 ; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07]
4536 ; X64-NEXT: retq # encoding: [0xc3]
4537 %b = load <8 x i32>, <8 x i32>* %ptr_b
4538 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4542 define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4543 ; X86-LABEL: test_mask_sub_epi32_rmk_256:
4545 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4546 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4547 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4548 ; X86-NEXT: vpsubd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x08]
4549 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4550 ; X86-NEXT: retl # encoding: [0xc3]
4552 ; X64-LABEL: test_mask_sub_epi32_rmk_256:
4554 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4555 ; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
4556 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4557 ; X64-NEXT: retq # encoding: [0xc3]
4558 %b = load <8 x i32>, <8 x i32>* %ptr_b
4559 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4563 define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
4564 ; X86-LABEL: test_mask_sub_epi32_rmkz_256:
4566 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4567 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4568 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4569 ; X86-NEXT: vpsubd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x00]
4570 ; X86-NEXT: retl # encoding: [0xc3]
4572 ; X64-LABEL: test_mask_sub_epi32_rmkz_256:
4574 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4575 ; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
4576 ; X64-NEXT: retq # encoding: [0xc3]
4577 %b = load <8 x i32>, <8 x i32>* %ptr_b
4578 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4582 define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4583 ; X86-LABEL: test_mask_sub_epi32_rmb_256:
4585 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4586 ; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x00]
4587 ; X86-NEXT: retl # encoding: [0xc3]
4589 ; X64-LABEL: test_mask_sub_epi32_rmb_256:
4591 ; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
4592 ; X64-NEXT: retq # encoding: [0xc3]
4593 %q = load i32, i32* %ptr_b
4594 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4595 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4596 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4600 define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4601 ; X86-LABEL: test_mask_sub_epi32_rmbk_256:
4603 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4604 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4605 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4606 ; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x08]
4607 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4608 ; X86-NEXT: retl # encoding: [0xc3]
4610 ; X64-LABEL: test_mask_sub_epi32_rmbk_256:
4612 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4613 ; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
4614 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4615 ; X64-NEXT: retq # encoding: [0xc3]
4616 %q = load i32, i32* %ptr_b
4617 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4618 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4619 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4623 define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
4624 ; X86-LABEL: test_mask_sub_epi32_rmbkz_256:
4626 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4627 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4628 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4629 ; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x00]
4630 ; X86-NEXT: retl # encoding: [0xc3]
4632 ; X64-LABEL: test_mask_sub_epi32_rmbkz_256:
4634 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4635 ; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
4636 ; X64-NEXT: retq # encoding: [0xc3]
4637 %q = load i32, i32* %ptr_b
4638 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4639 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4640 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4644 declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
4646 define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4647 ; CHECK-LABEL: test_mask_add_epi32_rr_256:
4649 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
4650 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4651 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4655 define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
4656 ; X86-LABEL: test_mask_add_epi32_rrk_256:
4658 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4659 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4660 ; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
4661 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4662 ; X86-NEXT: retl # encoding: [0xc3]
4664 ; X64-LABEL: test_mask_add_epi32_rrk_256:
4666 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4667 ; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
4668 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4669 ; X64-NEXT: retq # encoding: [0xc3]
4670 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4674 define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
4675 ; X86-LABEL: test_mask_add_epi32_rrkz_256:
4677 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4678 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4679 ; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
4680 ; X86-NEXT: retl # encoding: [0xc3]
4682 ; X64-LABEL: test_mask_add_epi32_rrkz_256:
4684 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4685 ; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
4686 ; X64-NEXT: retq # encoding: [0xc3]
4687 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4691 define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4692 ; X86-LABEL: test_mask_add_epi32_rm_256:
4694 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4695 ; X86-NEXT: vpaddd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x00]
4696 ; X86-NEXT: retl # encoding: [0xc3]
4698 ; X64-LABEL: test_mask_add_epi32_rm_256:
4700 ; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07]
4701 ; X64-NEXT: retq # encoding: [0xc3]
4702 %b = load <8 x i32>, <8 x i32>* %ptr_b
4703 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4707 define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4708 ; X86-LABEL: test_mask_add_epi32_rmk_256:
4710 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4711 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4712 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4713 ; X86-NEXT: vpaddd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x08]
4714 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4715 ; X86-NEXT: retl # encoding: [0xc3]
4717 ; X64-LABEL: test_mask_add_epi32_rmk_256:
4719 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4720 ; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
4721 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4722 ; X64-NEXT: retq # encoding: [0xc3]
4723 %b = load <8 x i32>, <8 x i32>* %ptr_b
4724 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4728 define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
4729 ; X86-LABEL: test_mask_add_epi32_rmkz_256:
4731 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4732 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4733 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4734 ; X86-NEXT: vpaddd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x00]
4735 ; X86-NEXT: retl # encoding: [0xc3]
4737 ; X64-LABEL: test_mask_add_epi32_rmkz_256:
4739 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4740 ; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
4741 ; X64-NEXT: retq # encoding: [0xc3]
4742 %b = load <8 x i32>, <8 x i32>* %ptr_b
4743 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4747 define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4748 ; X86-LABEL: test_mask_add_epi32_rmb_256:
4750 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4751 ; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x00]
4752 ; X86-NEXT: retl # encoding: [0xc3]
4754 ; X64-LABEL: test_mask_add_epi32_rmb_256:
4756 ; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
4757 ; X64-NEXT: retq # encoding: [0xc3]
4758 %q = load i32, i32* %ptr_b
4759 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4760 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4761 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4765 define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4766 ; X86-LABEL: test_mask_add_epi32_rmbk_256:
4768 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4769 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4770 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4771 ; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x08]
4772 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4773 ; X86-NEXT: retl # encoding: [0xc3]
4775 ; X64-LABEL: test_mask_add_epi32_rmbk_256:
4777 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4778 ; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
4779 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4780 ; X64-NEXT: retq # encoding: [0xc3]
4781 %q = load i32, i32* %ptr_b
4782 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4783 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4784 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4788 define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
4789 ; X86-LABEL: test_mask_add_epi32_rmbkz_256:
4791 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4792 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4793 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4794 ; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x00]
4795 ; X86-NEXT: retl # encoding: [0xc3]
4797 ; X64-LABEL: test_mask_add_epi32_rmbkz_256:
4799 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4800 ; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
4801 ; X64-NEXT: retq # encoding: [0xc3]
4802 %q = load i32, i32* %ptr_b
4803 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4804 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4805 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4809 declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
4811 define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4812 ; X86-LABEL: test_mm512_maskz_add_ps_256:
4814 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4815 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4816 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
4817 ; X86-NEXT: retl # encoding: [0xc3]
4819 ; X64-LABEL: test_mm512_maskz_add_ps_256:
4821 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4822 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
4823 ; X64-NEXT: retq # encoding: [0xc3]
4824 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
4825 ret <8 x float> %res
4828 define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
4829 ; X86-LABEL: test_mm512_mask_add_ps_256:
4831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4832 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4833 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
4834 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4835 ; X86-NEXT: retl # encoding: [0xc3]
4837 ; X64-LABEL: test_mm512_mask_add_ps_256:
4839 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4840 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
4841 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4842 ; X64-NEXT: retq # encoding: [0xc3]
4843 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
4844 ret <8 x float> %res
4847 define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4848 ; CHECK-LABEL: test_mm512_add_ps_256:
4850 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
4851 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4852 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
4853 ret <8 x float> %res
4855 declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
4857 define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4858 ; X86-LABEL: test_mm512_maskz_add_ps_128:
4860 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4861 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4862 ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
4863 ; X86-NEXT: retl # encoding: [0xc3]
4865 ; X64-LABEL: test_mm512_maskz_add_ps_128:
4867 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4868 ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
4869 ; X64-NEXT: retq # encoding: [0xc3]
4870 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
4871 ret <4 x float> %res
4874 define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
4875 ; X86-LABEL: test_mm512_mask_add_ps_128:
4877 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4878 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4879 ; X86-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
4880 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4881 ; X86-NEXT: retl # encoding: [0xc3]
4883 ; X64-LABEL: test_mm512_mask_add_ps_128:
4885 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4886 ; X64-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
4887 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4888 ; X64-NEXT: retq # encoding: [0xc3]
4889 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
4890 ret <4 x float> %res
4893 define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4894 ; CHECK-LABEL: test_mm512_add_ps_128:
4896 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
4897 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4898 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
4899 ret <4 x float> %res
4901 declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
4903 define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4904 ; X86-LABEL: test_mm512_maskz_sub_ps_256:
4906 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4907 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4908 ; X86-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
4909 ; X86-NEXT: retl # encoding: [0xc3]
4911 ; X64-LABEL: test_mm512_maskz_sub_ps_256:
4913 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4914 ; X64-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
4915 ; X64-NEXT: retq # encoding: [0xc3]
4916 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
4917 ret <8 x float> %res
4920 define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
4921 ; X86-LABEL: test_mm512_mask_sub_ps_256:
4923 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4924 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4925 ; X86-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
4926 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4927 ; X86-NEXT: retl # encoding: [0xc3]
4929 ; X64-LABEL: test_mm512_mask_sub_ps_256:
4931 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4932 ; X64-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
4933 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
4934 ; X64-NEXT: retq # encoding: [0xc3]
4935 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
4936 ret <8 x float> %res
4939 define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4940 ; CHECK-LABEL: test_mm512_sub_ps_256:
4942 ; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1]
4943 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4944 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
4945 ret <8 x float> %res
4947 declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
4949 define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4950 ; X86-LABEL: test_mm512_maskz_sub_ps_128:
4952 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4953 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4954 ; X86-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
4955 ; X86-NEXT: retl # encoding: [0xc3]
4957 ; X64-LABEL: test_mm512_maskz_sub_ps_128:
4959 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4960 ; X64-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
4961 ; X64-NEXT: retq # encoding: [0xc3]
4962 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
4963 ret <4 x float> %res
4966 define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
4967 ; X86-LABEL: test_mm512_mask_sub_ps_128:
4969 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4970 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4971 ; X86-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
4972 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4973 ; X86-NEXT: retl # encoding: [0xc3]
4975 ; X64-LABEL: test_mm512_mask_sub_ps_128:
4977 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4978 ; X64-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
4979 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
4980 ; X64-NEXT: retq # encoding: [0xc3]
4981 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
4982 ret <4 x float> %res
4985 define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4986 ; CHECK-LABEL: test_mm512_sub_ps_128:
4988 ; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1]
4989 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4990 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
4991 ret <4 x float> %res
4993 declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
4995 define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
4996 ; X86-LABEL: test_mm512_maskz_mul_ps_256:
4998 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4999 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5000 ; X86-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
5001 ; X86-NEXT: retl # encoding: [0xc3]
5003 ; X64-LABEL: test_mm512_maskz_mul_ps_256:
5005 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5006 ; X64-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
5007 ; X64-NEXT: retq # encoding: [0xc3]
5008 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5009 ret <8 x float> %res
5012 define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5013 ; X86-LABEL: test_mm512_mask_mul_ps_256:
5015 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5016 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5017 ; X86-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
5018 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5019 ; X86-NEXT: retl # encoding: [0xc3]
5021 ; X64-LABEL: test_mm512_mask_mul_ps_256:
5023 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5024 ; X64-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
5025 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5026 ; X64-NEXT: retq # encoding: [0xc3]
5027 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5028 ret <8 x float> %res
5031 define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5032 ; CHECK-LABEL: test_mm512_mul_ps_256:
5034 ; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1]
5035 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5036 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5037 ret <8 x float> %res
5039 declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5041 define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5042 ; X86-LABEL: test_mm512_maskz_mul_ps_128:
5044 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5045 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5046 ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
5047 ; X86-NEXT: retl # encoding: [0xc3]
5049 ; X64-LABEL: test_mm512_maskz_mul_ps_128:
5051 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5052 ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
5053 ; X64-NEXT: retq # encoding: [0xc3]
5054 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5055 ret <4 x float> %res
5058 define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5059 ; X86-LABEL: test_mm512_mask_mul_ps_128:
5061 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5062 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5063 ; X86-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
5064 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5065 ; X86-NEXT: retl # encoding: [0xc3]
5067 ; X64-LABEL: test_mm512_mask_mul_ps_128:
5069 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5070 ; X64-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
5071 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5072 ; X64-NEXT: retq # encoding: [0xc3]
5073 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5074 ret <4 x float> %res
5077 define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5078 ; CHECK-LABEL: test_mm512_mul_ps_128:
5080 ; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1]
5081 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5082 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5083 ret <4 x float> %res
5085 declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5087 define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5088 ; X86-LABEL: test_mm512_maskz_div_ps_256:
5090 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5091 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5092 ; X86-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
5093 ; X86-NEXT: retl # encoding: [0xc3]
5095 ; X64-LABEL: test_mm512_maskz_div_ps_256:
5097 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5098 ; X64-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
5099 ; X64-NEXT: retq # encoding: [0xc3]
5100 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5101 ret <8 x float> %res
5104 define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5105 ; X86-LABEL: test_mm512_mask_div_ps_256:
5107 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5108 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5109 ; X86-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
5110 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5111 ; X86-NEXT: retl # encoding: [0xc3]
5113 ; X64-LABEL: test_mm512_mask_div_ps_256:
5115 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5116 ; X64-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
5117 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5118 ; X64-NEXT: retq # encoding: [0xc3]
5119 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5120 ret <8 x float> %res
5123 define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5124 ; CHECK-LABEL: test_mm512_div_ps_256:
5126 ; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1]
5127 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5128 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5129 ret <8 x float> %res
5131 declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5133 define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5134 ; X86-LABEL: test_mm512_maskz_div_ps_128:
5136 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5137 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5138 ; X86-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
5139 ; X86-NEXT: retl # encoding: [0xc3]
5141 ; X64-LABEL: test_mm512_maskz_div_ps_128:
5143 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5144 ; X64-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
5145 ; X64-NEXT: retq # encoding: [0xc3]
5146 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5147 ret <4 x float> %res
5150 define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5151 ; X86-LABEL: test_mm512_mask_div_ps_128:
5153 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5154 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5155 ; X86-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
5156 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5157 ; X86-NEXT: retl # encoding: [0xc3]
5159 ; X64-LABEL: test_mm512_mask_div_ps_128:
5161 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5162 ; X64-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
5163 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5164 ; X64-NEXT: retq # encoding: [0xc3]
5165 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5166 ret <4 x float> %res
5169 define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5170 ; CHECK-LABEL: test_mm512_div_ps_128:
5172 ; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1]
5173 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
5174 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5175 ret <4 x float> %res
5177 declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5179 declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
5181 define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
5182 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
5184 ; X86-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5185 ; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5186 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5187 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5188 ; X86-NEXT: vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
5189 ; X86-NEXT: vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
5190 ; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
5191 ; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
5192 ; X86-NEXT: retl # encoding: [0xc3]
5194 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
5196 ; X64-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5197 ; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5198 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5199 ; X64-NEXT: vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
5200 ; X64-NEXT: vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
5201 ; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
5202 ; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
5203 ; X64-NEXT: retq # encoding: [0xc3]
5204 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
5205 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
5206 %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
5207 %res3 = fadd <8 x float> %res, %res1
5208 %res4 = fadd <8 x float> %res2, %res3
5209 ret <8 x float> %res4
5212 declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
5214 define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5215 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
5217 ; X86-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5218 ; X86-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3]
5219 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5220 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5221 ; X86-NEXT: vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0]
5222 ; X86-NEXT: vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8]
5223 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
5224 ; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
5225 ; X86-NEXT: retl # encoding: [0xc3]
5227 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
5229 ; X64-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5230 ; X64-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3]
5231 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5232 ; X64-NEXT: vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0]
5233 ; X64-NEXT: vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8]
5234 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
5235 ; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
5236 ; X64-NEXT: retq # encoding: [0xc3]
5237 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
5238 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
5239 %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
5240 %res3 = fadd <4 x double> %res, %res1
5241 %res4 = fadd <4 x double> %res2, %res3
5242 ret <4 x double> %res4
5245 declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
5247 define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
5248 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
5250 ; X86-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5251 ; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5252 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5253 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5254 ; X86-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0]
5255 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5256 ; X86-NEXT: retl # encoding: [0xc3]
5258 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
5260 ; X64-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5261 ; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5262 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5263 ; X64-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0]
5264 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5265 ; X64-NEXT: retq # encoding: [0xc3]
5266 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
5267 %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
5268 %res2 = add <8 x i32> %res, %res1
5272 declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
5274 define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
5275 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
5277 ; X86-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5278 ; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5279 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5280 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5281 ; X86-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0]
5282 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5283 ; X86-NEXT: retl # encoding: [0xc3]
5285 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
5287 ; X64-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5288 ; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5289 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5290 ; X64-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0]
5291 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5292 ; X64-NEXT: retq # encoding: [0xc3]
5293 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
5294 %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
5295 %res2 = add <4 x i64> %res, %res1
5299 declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
5301 define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5302 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
5304 ; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01]
5305 ; X86-NEXT: # xmm3 = xmm0[1],xmm1[0]
5306 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5307 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5308 ; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
5309 ; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[0]
5310 ; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
5311 ; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
5312 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
5313 ; X86-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
5314 ; X86-NEXT: retl # encoding: [0xc3]
5316 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
5318 ; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01]
5319 ; X64-NEXT: # xmm3 = xmm0[1],xmm1[0]
5320 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5321 ; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
5322 ; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[0]
5323 ; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
5324 ; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
5325 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
5326 ; X64-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
5327 ; X64-NEXT: retq # encoding: [0xc3]
5328 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4)
5329 %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1)
5330 %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> zeroinitializer, i8 %x4)
5331 %res3 = fadd <2 x double> %res, %res1
5332 %res4 = fadd <2 x double> %res2, %res3
5333 ret <2 x double> %res4
5336 declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
5338 define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5339 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
5341 ; X86-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06]
5342 ; X86-NEXT: # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5343 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5344 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5345 ; X86-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
5346 ; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5347 ; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
5348 ; X86-NEXT: retl # encoding: [0xc3]
5350 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
5352 ; X64-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06]
5353 ; X64-NEXT: # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5354 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5355 ; X64-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
5356 ; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5357 ; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
5358 ; X64-NEXT: retq # encoding: [0xc3]
5359 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4)
5360 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1)
5361 %res2 = fadd <4 x double> %res, %res1
5362 ret <4 x double> %res2
5365 declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
5367 define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
5368 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
5370 ; X86-NEXT: vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16]
5371 ; X86-NEXT: # xmm3 = xmm0[2,1],xmm1[1,0]
5372 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5373 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5374 ; X86-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5375 ; X86-NEXT: # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
5376 ; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
5377 ; X86-NEXT: retl # encoding: [0xc3]
5379 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
5381 ; X64-NEXT: vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16]
5382 ; X64-NEXT: # xmm3 = xmm0[2,1],xmm1[1,0]
5383 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5384 ; X64-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5385 ; X64-NEXT: # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
5386 ; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
5387 ; X64-NEXT: retq # encoding: [0xc3]
5388 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
5389 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
5390 %res2 = fadd <4 x float> %res, %res1
5391 ret <4 x float> %res2
5394 declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
5396 define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
5397 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
5399 ; X86-NEXT: vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16]
5400 ; X86-NEXT: # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5401 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5402 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5403 ; X86-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5404 ; X86-NEXT: # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5405 ; X86-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
5406 ; X86-NEXT: retl # encoding: [0xc3]
5408 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
5410 ; X64-NEXT: vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16]
5411 ; X64-NEXT: # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5412 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5413 ; X64-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5414 ; X64-NEXT: # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5415 ; X64-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
5416 ; X64-NEXT: retq # encoding: [0xc3]
5417 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
5418 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
5419 %res2 = fadd <8 x float> %res, %res1
5420 ret <8 x float> %res2
5423 declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5425 define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5426 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
5428 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5429 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5430 ; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
5431 ; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
5432 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5433 ; X86-NEXT: retl # encoding: [0xc3]
5435 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
5437 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5438 ; X64-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
5439 ; X64-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
5440 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5441 ; X64-NEXT: retq # encoding: [0xc3]
5442 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
5443 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5444 %res2 = add <4 x i32> %res, %res1
5448 declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5450 define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5451 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
5453 ; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9]
5454 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5455 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5456 ; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
5457 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5458 ; X86-NEXT: retl # encoding: [0xc3]
5460 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
5462 ; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9]
5463 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5464 ; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
5465 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5466 ; X64-NEXT: retq # encoding: [0xc3]
5467 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5468 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5469 %res2 = add <8 x i32> %res, %res1
5473 declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5475 define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5476 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
5478 ; X86-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9]
5479 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5480 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5481 ; X86-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
5482 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5483 ; X86-NEXT: retl # encoding: [0xc3]
5485 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
5487 ; X64-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9]
5488 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5489 ; X64-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
5490 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5491 ; X64-NEXT: retq # encoding: [0xc3]
5492 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5493 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5494 %res2 = add <2 x i64> %res, %res1
5498 declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5500 define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5501 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
5503 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5504 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5505 ; X86-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
5506 ; X86-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
5507 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5508 ; X86-NEXT: retl # encoding: [0xc3]
5510 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
5512 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5513 ; X64-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
5514 ; X64-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
5515 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5516 ; X64-NEXT: retq # encoding: [0xc3]
5517 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5518 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5519 %res2 = add <4 x i64> %res, %res1
5523 declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5525 define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
5526 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
5528 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5529 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5530 ; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
5531 ; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
5532 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5533 ; X86-NEXT: retl # encoding: [0xc3]
5535 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
5537 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5538 ; X64-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
5539 ; X64-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
5540 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5541 ; X64-NEXT: retq # encoding: [0xc3]
5542 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
5543 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5544 %res2 = add <4 x i32> %res, %res1
5548 declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5550 define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5551 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
5553 ; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9]
5554 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5555 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5556 ; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
5557 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5558 ; X86-NEXT: retl # encoding: [0xc3]
5560 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
5562 ; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9]
5563 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5564 ; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
5565 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5566 ; X64-NEXT: retq # encoding: [0xc3]
5567 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5568 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5569 %res2 = add <8 x i32> %res, %res1
5573 declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5575 define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5576 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
5578 ; X86-NEXT: vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9]
5579 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5580 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5581 ; X86-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
5582 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5583 ; X86-NEXT: retl # encoding: [0xc3]
5585 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
5587 ; X64-NEXT: vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9]
5588 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5589 ; X64-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
5590 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5591 ; X64-NEXT: retq # encoding: [0xc3]
5592 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5593 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5594 %res2 = add <2 x i64> %res, %res1
5598 declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5600 define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5601 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
5603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5604 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5605 ; X86-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
5606 ; X86-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
5607 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5608 ; X86-NEXT: retl # encoding: [0xc3]
5610 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
5612 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5613 ; X64-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
5614 ; X64-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
5615 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5616 ; X64-NEXT: retq # encoding: [0xc3]
5617 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5618 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5619 %res2 = add <4 x i64> %res, %res1
5623 declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5625 define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5626 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_128:
5628 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5629 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5630 ; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
5631 ; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
5632 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5633 ; X86-NEXT: retl # encoding: [0xc3]
5635 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_128:
5637 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5638 ; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
5639 ; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
5640 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5641 ; X64-NEXT: retq # encoding: [0xc3]
5642 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
5643 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5644 %res2 = add <4 x i32> %res, %res1
5648 declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5650 define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5651 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_256:
5653 ; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9]
5654 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5655 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5656 ; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
5657 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5658 ; X86-NEXT: retl # encoding: [0xc3]
5660 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_256:
5662 ; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9]
5663 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5664 ; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
5665 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5666 ; X64-NEXT: retq # encoding: [0xc3]
5667 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5668 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5669 %res2 = add <8 x i32> %res, %res1
5673 declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5675 define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5676 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_128:
5678 ; X86-NEXT: vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9]
5679 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5680 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5681 ; X86-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
5682 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5683 ; X86-NEXT: retl # encoding: [0xc3]
5685 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_128:
5687 ; X64-NEXT: vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9]
5688 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5689 ; X64-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
5690 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5691 ; X64-NEXT: retq # encoding: [0xc3]
5692 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5693 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5694 %res2 = add <2 x i64> %res, %res1
5698 declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5700 define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5701 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_256:
5703 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5704 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5705 ; X86-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
5706 ; X86-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
5707 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5708 ; X86-NEXT: retl # encoding: [0xc3]
5710 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_256:
5712 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5713 ; X64-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
5714 ; X64-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
5715 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5716 ; X64-NEXT: retq # encoding: [0xc3]
5717 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5718 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5719 %res2 = add <4 x i64> %res, %res1
5723 declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5725 define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5726 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_128:
5728 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5729 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5730 ; X86-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
5731 ; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
5732 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5733 ; X86-NEXT: retl # encoding: [0xc3]
5735 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_128:
5737 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5738 ; X64-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
5739 ; X64-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
5740 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5741 ; X64-NEXT: retq # encoding: [0xc3]
5742 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
5743 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5744 %res2 = add <4 x i32> %res, %res1
5748 declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5750 define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5751 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_256:
5753 ; X86-NEXT: vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9]
5754 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5755 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5756 ; X86-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
5757 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5758 ; X86-NEXT: retl # encoding: [0xc3]
5760 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_256:
5762 ; X64-NEXT: vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9]
5763 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5764 ; X64-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
5765 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
5766 ; X64-NEXT: retq # encoding: [0xc3]
5767 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5768 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5769 %res2 = add <8 x i32> %res, %res1
5773 declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5775 define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5776 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_128:
5778 ; X86-NEXT: vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9]
5779 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5780 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5781 ; X86-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
5782 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5783 ; X86-NEXT: retl # encoding: [0xc3]
5785 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_128:
5787 ; X64-NEXT: vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9]
5788 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5789 ; X64-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
5790 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
5791 ; X64-NEXT: retq # encoding: [0xc3]
5792 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5793 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5794 %res2 = add <2 x i64> %res, %res1
5798 declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5800 define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5801 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_256:
5803 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5804 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5805 ; X86-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
5806 ; X86-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
5807 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5808 ; X86-NEXT: retl # encoding: [0xc3]
5810 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_256:
5812 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5813 ; X64-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
5814 ; X64-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
5815 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5816 ; X64-NEXT: retq # encoding: [0xc3]
5817 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
5818 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
5819 %res2 = add <4 x i64> %res, %res1
5823 declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5825 define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5826 ; X86-LABEL: test_int_x86_avx512_mask_psrl_q_128:
5828 ; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9]
5829 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5830 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5831 ; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
5832 ; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
5833 ; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
5834 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
5835 ; X86-NEXT: retl # encoding: [0xc3]
5837 ; X64-LABEL: test_int_x86_avx512_mask_psrl_q_128:
5839 ; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9]
5840 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5841 ; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
5842 ; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
5843 ; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
5844 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
5845 ; X64-NEXT: retq # encoding: [0xc3]
5846 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5847 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5848 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
5849 %res3 = add <2 x i64> %res, %res1
5850 %res4 = add <2 x i64> %res3, %res2
5854 declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
5856 define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
5857 ; X86-LABEL: test_int_x86_avx512_mask_psrl_q_256:
5859 ; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9]
5860 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5861 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5862 ; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
5863 ; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
5864 ; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
5865 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5866 ; X86-NEXT: retl # encoding: [0xc3]
5868 ; X64-LABEL: test_int_x86_avx512_mask_psrl_q_256:
5870 ; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9]
5871 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5872 ; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
5873 ; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
5874 ; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
5875 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
5876 ; X64-NEXT: retq # encoding: [0xc3]
5877 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
5878 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
5879 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
5880 %res3 = add <4 x i64> %res, %res1
5881 %res4 = add <4 x i64> %res3, %res2
5885 declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5887 define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5888 ; X86-LABEL: test_int_x86_avx512_mask_psrl_d_128:
5890 ; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9]
5891 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5892 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5893 ; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
5894 ; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
5895 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
5896 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5897 ; X86-NEXT: retl # encoding: [0xc3]
5899 ; X64-LABEL: test_int_x86_avx512_mask_psrl_d_128:
5901 ; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9]
5902 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5903 ; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
5904 ; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
5905 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
5906 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5907 ; X64-NEXT: retq # encoding: [0xc3]
5908 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5909 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5910 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5911 %res3 = add <4 x i32> %res, %res1
5912 %res4 = add <4 x i32> %res3, %res2
5916 declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5918 define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5919 ; X86-LABEL: test_int_x86_avx512_mask_psrl_d_256:
5921 ; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9]
5922 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5923 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5924 ; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
5925 ; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
5926 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
5927 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5928 ; X86-NEXT: retl # encoding: [0xc3]
5930 ; X64-LABEL: test_int_x86_avx512_mask_psrl_d_256:
5932 ; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9]
5933 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5934 ; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
5935 ; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
5936 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
5937 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5938 ; X64-NEXT: retq # encoding: [0xc3]
5939 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
5940 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
5941 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
5942 %res3 = add <8 x i32> %res, %res1
5943 %res4 = add <8 x i32> %res2, %res3
5947 declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5949 define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
5950 ; X86-LABEL: test_int_x86_avx512_mask_psra_d_128:
5952 ; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9]
5953 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5954 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5955 ; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
5956 ; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
5957 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
5958 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5959 ; X86-NEXT: retl # encoding: [0xc3]
5961 ; X64-LABEL: test_int_x86_avx512_mask_psra_d_128:
5963 ; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9]
5964 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5965 ; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
5966 ; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
5967 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
5968 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
5969 ; X64-NEXT: retq # encoding: [0xc3]
5970 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
5971 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
5972 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
5973 %res3 = add <4 x i32> %res, %res1
5974 %res4 = add <4 x i32> %res3, %res2
5978 declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
5980 define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5981 ; X86-LABEL: test_int_x86_avx512_mask_psra_d_256:
5983 ; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9]
5984 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5985 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5986 ; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
5987 ; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
5988 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
5989 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
5990 ; X86-NEXT: retl # encoding: [0xc3]
5992 ; X64-LABEL: test_int_x86_avx512_mask_psra_d_256:
5994 ; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9]
5995 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5996 ; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
5997 ; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
5998 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
5999 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6000 ; X64-NEXT: retq # encoding: [0xc3]
6001 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6002 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6003 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6004 %res3 = add <8 x i32> %res, %res1
6005 %res4 = add <8 x i32> %res3, %res2
6009 declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6011 define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6012 ; X86-LABEL: test_int_x86_avx512_mask_psll_d_128:
6014 ; X86-NEXT: vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9]
6015 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6016 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6017 ; X86-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
6018 ; X86-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
6019 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6020 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6021 ; X86-NEXT: retl # encoding: [0xc3]
6023 ; X64-LABEL: test_int_x86_avx512_mask_psll_d_128:
6025 ; X64-NEXT: vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9]
6026 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6027 ; X64-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
6028 ; X64-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
6029 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6030 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6031 ; X64-NEXT: retq # encoding: [0xc3]
6032 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6033 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6034 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6035 %res3 = add <4 x i32> %res, %res1
6036 %res4 = add <4 x i32> %res3, %res2
6040 declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6042 define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6043 ; X86-LABEL: test_int_x86_avx512_mask_psll_d_256:
6045 ; X86-NEXT: vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9]
6046 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6047 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6048 ; X86-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
6049 ; X86-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
6050 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6051 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6052 ; X86-NEXT: retl # encoding: [0xc3]
6054 ; X64-LABEL: test_int_x86_avx512_mask_psll_d_256:
6056 ; X64-NEXT: vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9]
6057 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6058 ; X64-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
6059 ; X64-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
6060 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6061 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6062 ; X64-NEXT: retq # encoding: [0xc3]
6063 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6064 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6065 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6066 %res3 = add <8 x i32> %res, %res1
6067 %res4 = add <8 x i32> %res3, %res2
6071 declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
6073 define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6074 ; X86-LABEL: test_int_x86_avx512_mask_psll_q_256:
6076 ; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9]
6077 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6078 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6079 ; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
6080 ; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
6081 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6082 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6083 ; X86-NEXT: retl # encoding: [0xc3]
6085 ; X64-LABEL: test_int_x86_avx512_mask_psll_q_256:
6087 ; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9]
6088 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6089 ; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
6090 ; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
6091 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6092 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6093 ; X64-NEXT: retq # encoding: [0xc3]
6094 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
6095 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6096 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
6097 %res3 = add <4 x i64> %res, %res1
6098 %res4 = add <4 x i64> %res3, %res2
6102 declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8)
6104 define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
6105 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
6107 ; X86-NEXT: vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03]
6108 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6109 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6110 ; X86-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
6111 ; X86-NEXT: vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03]
6112 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6113 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6114 ; X86-NEXT: retl # encoding: [0xc3]
6116 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
6118 ; X64-NEXT: vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03]
6119 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6120 ; X64-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
6121 ; X64-NEXT: vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03]
6122 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6123 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6124 ; X64-NEXT: retq # encoding: [0xc3]
6125 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
6126 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
6127 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
6128 %res3 = add <2 x i64> %res, %res1
6129 %res4 = add <2 x i64> %res2, %res3
6133 declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8)
6135 define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
6136 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
6138 ; X86-NEXT: vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03]
6139 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6140 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6141 ; X86-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
6142 ; X86-NEXT: vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03]
6143 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6144 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6145 ; X86-NEXT: retl # encoding: [0xc3]
6147 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
6149 ; X64-NEXT: vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03]
6150 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6151 ; X64-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
6152 ; X64-NEXT: vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03]
6153 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6154 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6155 ; X64-NEXT: retq # encoding: [0xc3]
6156 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
6157 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
6158 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
6159 %res3 = add <4 x i64> %res, %res1
6160 %res4 = add <4 x i64> %res2, %res3
6164 declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8)
6166 define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6167 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_128:
6169 ; X86-NEXT: vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03]
6170 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6171 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6172 ; X86-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
6173 ; X86-NEXT: vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03]
6174 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6175 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6176 ; X86-NEXT: retl # encoding: [0xc3]
6178 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128:
6180 ; X64-NEXT: vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03]
6181 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6182 ; X64-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
6183 ; X64-NEXT: vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03]
6184 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6185 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6186 ; X64-NEXT: retq # encoding: [0xc3]
6187 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6188 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
6189 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
6190 %res3 = add <4 x i32> %res, %res1
6191 %res4 = add <4 x i32> %res2, %res3
6195 declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8)
6197 define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6198 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_256:
6200 ; X86-NEXT: vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03]
6201 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6202 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6203 ; X86-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
6204 ; X86-NEXT: vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03]
6205 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6206 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6207 ; X86-NEXT: retl # encoding: [0xc3]
6209 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256:
6211 ; X64-NEXT: vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03]
6212 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6213 ; X64-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
6214 ; X64-NEXT: vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03]
6215 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6216 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6217 ; X64-NEXT: retq # encoding: [0xc3]
6218 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6219 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
6220 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
6221 %res3 = add <8 x i32> %res, %res1
6222 %res4 = add <8 x i32> %res2, %res3
6226 declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8)
6228 define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6229 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_128:
6231 ; X86-NEXT: vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03]
6232 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6233 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6234 ; X86-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
6235 ; X86-NEXT: vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03]
6236 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6237 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6238 ; X86-NEXT: retl # encoding: [0xc3]
6240 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_128:
6242 ; X64-NEXT: vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03]
6243 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6244 ; X64-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
6245 ; X64-NEXT: vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03]
6246 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6247 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6248 ; X64-NEXT: retq # encoding: [0xc3]
6249 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6250 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
6251 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
6252 %res3 = add <4 x i32> %res, %res1
6253 %res4 = add <4 x i32> %res3, %res2
6257 declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8)
6259 define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6260 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_256:
6262 ; X86-NEXT: vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03]
6263 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6264 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6265 ; X86-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
6266 ; X86-NEXT: vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03]
6267 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6268 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6269 ; X86-NEXT: retl # encoding: [0xc3]
6271 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_256:
6273 ; X64-NEXT: vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03]
6274 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6275 ; X64-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
6276 ; X64-NEXT: vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03]
6277 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6278 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6279 ; X64-NEXT: retq # encoding: [0xc3]
6280 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6281 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
6282 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
6283 %res3 = add <8 x i32> %res, %res1
6284 %res4 = add <8 x i32> %res3, %res2
6288 declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6290 define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6291 ; X86-LABEL: test_int_x86_avx512_mask_psrlv2_di:
6293 ; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9]
6294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6295 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6296 ; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
6297 ; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
6298 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6299 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6300 ; X86-NEXT: retl # encoding: [0xc3]
6302 ; X64-LABEL: test_int_x86_avx512_mask_psrlv2_di:
6304 ; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9]
6305 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6306 ; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
6307 ; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
6308 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6309 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6310 ; X64-NEXT: retq # encoding: [0xc3]
6311 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6312 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6313 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6314 %res3 = add <2 x i64> %res, %res1
6315 %res4 = add <2 x i64> %res3, %res2
6319 declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6321 define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6322 ; X86-LABEL: test_int_x86_avx512_mask_psrlv4_di:
6324 ; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9]
6325 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6326 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6327 ; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
6328 ; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
6329 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6330 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6331 ; X86-NEXT: retl # encoding: [0xc3]
6333 ; X64-LABEL: test_int_x86_avx512_mask_psrlv4_di:
6335 ; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9]
6336 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6337 ; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
6338 ; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
6339 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6340 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6341 ; X64-NEXT: retq # encoding: [0xc3]
6342 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6343 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6344 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6345 %res3 = add <4 x i64> %res, %res1
6346 %res4 = add <4 x i64> %res3, %res2
6350 declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6352 define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6353 ; X86-LABEL: test_int_x86_avx512_mask_psrlv4_si:
6355 ; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9]
6356 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6357 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6358 ; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
6359 ; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
6360 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6361 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6362 ; X86-NEXT: retl # encoding: [0xc3]
6364 ; X64-LABEL: test_int_x86_avx512_mask_psrlv4_si:
6366 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9]
6367 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6368 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
6369 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
6370 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6371 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6372 ; X64-NEXT: retq # encoding: [0xc3]
6373 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6374 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6375 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6376 %res3 = add <4 x i32> %res, %res1
6377 %res4 = add <4 x i32> %res3, %res2
6381 declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6383 define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6384 ; X86-LABEL: test_int_x86_avx512_mask_psrlv8_si:
6386 ; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9]
6387 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6388 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6389 ; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
6390 ; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
6391 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6392 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6393 ; X86-NEXT: retl # encoding: [0xc3]
6395 ; X64-LABEL: test_int_x86_avx512_mask_psrlv8_si:
6397 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9]
6398 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6399 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
6400 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
6401 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6402 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6403 ; X64-NEXT: retq # encoding: [0xc3]
6404 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6405 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6406 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6407 %res3 = add <8 x i32> %res, %res1
6408 %res4 = add <8 x i32> %res3, %res2
6412 declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6414 define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6415 ; X86-LABEL: test_int_x86_avx512_mask_psrav4_si:
6417 ; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9]
6418 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6419 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6420 ; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
6421 ; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
6422 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6423 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6424 ; X86-NEXT: retl # encoding: [0xc3]
6426 ; X64-LABEL: test_int_x86_avx512_mask_psrav4_si:
6428 ; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9]
6429 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6430 ; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
6431 ; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
6432 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6433 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6434 ; X64-NEXT: retq # encoding: [0xc3]
6435 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6436 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6437 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6438 %res3 = add <4 x i32> %res, %res1
6439 %res4 = add <4 x i32> %res3, %res2
6443 declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6445 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6446 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_si:
6448 ; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9]
6449 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6450 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6451 ; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
6452 ; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
6453 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6454 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6455 ; X86-NEXT: retl # encoding: [0xc3]
6457 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_si:
6459 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9]
6460 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6461 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
6462 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
6463 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6464 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6465 ; X64-NEXT: retq # encoding: [0xc3]
6466 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6467 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6468 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6469 %res3 = add <8 x i32> %res, %res1
6470 %res4 = add <8 x i32> %res3, %res2
6474 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
6475 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
6477 ; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
6478 ; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
6479 ; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
6480 ; X86-NEXT: vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
6481 ; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
6482 ; X86-NEXT: retl # encoding: [0xc3]
6484 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
6486 ; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
6487 ; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
6488 ; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
6489 ; X64-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
6490 ; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
6491 ; X64-NEXT: retq # encoding: [0xc3]
6492 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
6496 declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6498 define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6499 ; X86-LABEL: test_int_x86_avx512_mask_psllv2_di:
6501 ; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9]
6502 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6503 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6504 ; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
6505 ; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
6506 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6507 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6508 ; X86-NEXT: retl # encoding: [0xc3]
6510 ; X64-LABEL: test_int_x86_avx512_mask_psllv2_di:
6512 ; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9]
6513 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6514 ; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
6515 ; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
6516 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
6517 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6518 ; X64-NEXT: retq # encoding: [0xc3]
6519 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6520 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6521 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6522 %res3 = add <2 x i64> %res, %res1
6523 %res4 = add <2 x i64> %res3, %res2
6527 declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6529 define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6530 ; X86-LABEL: test_int_x86_avx512_mask_psllv4_di:
6532 ; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9]
6533 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6534 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6535 ; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
6536 ; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
6537 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6538 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6539 ; X86-NEXT: retl # encoding: [0xc3]
6541 ; X64-LABEL: test_int_x86_avx512_mask_psllv4_di:
6543 ; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9]
6544 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6545 ; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
6546 ; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
6547 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
6548 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6549 ; X64-NEXT: retq # encoding: [0xc3]
6550 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6551 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6552 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6553 %res3 = add <4 x i64> %res, %res1
6554 %res4 = add <4 x i64> %res3, %res2
6558 declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6560 define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6561 ; X86-LABEL: test_int_x86_avx512_mask_psllv4_si:
6563 ; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9]
6564 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6565 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6566 ; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
6567 ; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
6568 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6569 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6570 ; X86-NEXT: retl # encoding: [0xc3]
6572 ; X64-LABEL: test_int_x86_avx512_mask_psllv4_si:
6574 ; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9]
6575 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6576 ; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
6577 ; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
6578 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
6579 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6580 ; X64-NEXT: retq # encoding: [0xc3]
6581 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6582 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6583 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6584 %res3 = add <4 x i32> %res, %res1
6585 %res4 = add <4 x i32> %res3, %res2
6589 declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6591 define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6592 ; X86-LABEL: test_int_x86_avx512_mask_psllv8_si:
6594 ; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9]
6595 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6596 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6597 ; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
6598 ; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
6599 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6600 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6601 ; X86-NEXT: retl # encoding: [0xc3]
6603 ; X64-LABEL: test_int_x86_avx512_mask_psllv8_si:
6605 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9]
6606 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6607 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
6608 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
6609 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
6610 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6611 ; X64-NEXT: retq # encoding: [0xc3]
6612 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6613 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6614 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6615 %res3 = add <8 x i32> %res, %res1
6616 %res4 = add <8 x i32> %res3, %res2
6620 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
6622 define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
6623 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
6625 ; X86-NEXT: vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0]
6626 ; X86-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6627 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6628 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6629 ; X86-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
6630 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6631 ; X86-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
6632 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6633 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6634 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6635 ; X86-NEXT: retl # encoding: [0xc3]
6637 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
6639 ; X64-NEXT: vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0]
6640 ; X64-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6641 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6642 ; X64-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
6643 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6644 ; X64-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
6645 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6646 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6647 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6648 ; X64-NEXT: retq # encoding: [0xc3]
6649 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
6650 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
6651 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
6652 %res3 = add <4 x i32> %res, %res1
6653 %res4 = add <4 x i32> %res3, %res2
6657 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
6659 define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
6660 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
6662 ; X86-NEXT: vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0]
6663 ; X86-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6664 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6665 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6666 ; X86-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
6667 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6668 ; X86-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
6669 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6670 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6671 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6672 ; X86-NEXT: retl # encoding: [0xc3]
6674 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
6676 ; X64-NEXT: vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0]
6677 ; X64-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6678 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6679 ; X64-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
6680 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6681 ; X64-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
6682 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
6683 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6684 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6685 ; X64-NEXT: retq # encoding: [0xc3]
6686 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
6687 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
6688 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
6689 %res3 = add <8 x i32> %res, %res1
6690 %res4 = add <8 x i32> %res3, %res2
6694 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
6696 define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
6697 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
6699 ; X86-NEXT: vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0]
6700 ; X86-NEXT: # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6701 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6702 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6703 ; X86-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
6704 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6705 ; X86-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
6706 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6707 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6708 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6709 ; X86-NEXT: retl # encoding: [0xc3]
6711 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
6713 ; X64-NEXT: vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0]
6714 ; X64-NEXT: # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6715 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6716 ; X64-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
6717 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6718 ; X64-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
6719 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
6720 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6721 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6722 ; X64-NEXT: retq # encoding: [0xc3]
6723 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
6724 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
6725 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
6726 %res3 = add <2 x i64> %res, %res1
6727 %res4 = add <2 x i64> %res3, %res2
6731 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
6733 define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
6734 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
6736 ; X86-NEXT: vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0]
6737 ; X86-NEXT: # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6738 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6739 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6740 ; X86-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
6741 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6742 ; X86-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
6743 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6744 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6745 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6746 ; X86-NEXT: retl # encoding: [0xc3]
6748 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
6750 ; X64-NEXT: vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0]
6751 ; X64-NEXT: # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6752 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6753 ; X64-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
6754 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6755 ; X64-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
6756 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
6757 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6758 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6759 ; X64-NEXT: retq # encoding: [0xc3]
6760 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
6761 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
6762 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
6763 %res3 = add <4 x i64> %res, %res1
6764 %res4 = add <4 x i64> %res3, %res2
6768 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
6770 define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
6771 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
6773 ; X86-NEXT: vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0]
6774 ; X86-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero
6775 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6776 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6777 ; X86-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
6778 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
6779 ; X86-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
6780 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
6781 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6782 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6783 ; X86-NEXT: retl # encoding: [0xc3]
6785 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
6787 ; X64-NEXT: vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0]
6788 ; X64-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero
6789 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6790 ; X64-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
6791 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
6792 ; X64-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
6793 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
6794 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6795 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6796 ; X64-NEXT: retq # encoding: [0xc3]
6797 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
6798 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
6799 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
6800 %res3 = add <2 x i64> %res, %res1
6801 %res4 = add <2 x i64> %res3, %res2
6805 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
6807 define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
6808 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
6810 ; X86-NEXT: vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0]
6811 ; X86-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6812 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6813 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6814 ; X86-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
6815 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6816 ; X86-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
6817 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6818 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6819 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6820 ; X86-NEXT: retl # encoding: [0xc3]
6822 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
6824 ; X64-NEXT: vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0]
6825 ; X64-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6826 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6827 ; X64-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
6828 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6829 ; X64-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
6830 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6831 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6832 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6833 ; X64-NEXT: retq # encoding: [0xc3]
6834 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
6835 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
6836 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
6837 %res3 = add <4 x i64> %res, %res1
6838 %res4 = add <4 x i64> %res3, %res2
6842 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
6844 define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
6845 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
6847 ; X86-NEXT: vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0]
6848 ; X86-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6849 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6850 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6851 ; X86-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
6852 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6853 ; X86-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
6854 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6855 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6856 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6857 ; X86-NEXT: retl # encoding: [0xc3]
6859 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
6861 ; X64-NEXT: vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0]
6862 ; X64-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6863 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6864 ; X64-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
6865 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6866 ; X64-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
6867 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
6868 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
6869 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6870 ; X64-NEXT: retq # encoding: [0xc3]
6871 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
6872 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
6873 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
6874 %res3 = add <4 x i32> %res, %res1
6875 %res4 = add <4 x i32> %res3, %res2
6879 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
6881 define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
6882 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
6884 ; X86-NEXT: vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0]
6885 ; X86-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6886 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6887 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6888 ; X86-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
6889 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6890 ; X86-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
6891 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6892 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6893 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6894 ; X86-NEXT: retl # encoding: [0xc3]
6896 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
6898 ; X64-NEXT: vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0]
6899 ; X64-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6900 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6901 ; X64-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
6902 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6903 ; X64-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
6904 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6905 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
6906 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6907 ; X64-NEXT: retq # encoding: [0xc3]
6908 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
6909 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
6910 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
6911 %res3 = add <8 x i32> %res, %res1
6912 %res4 = add <8 x i32> %res3, %res2
6916 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
6918 define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
6919 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
6921 ; X86-NEXT: vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0]
6922 ; X86-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6923 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6924 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6925 ; X86-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
6926 ; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6927 ; X86-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
6928 ; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6929 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6930 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6931 ; X86-NEXT: retl # encoding: [0xc3]
6933 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
6935 ; X64-NEXT: vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0]
6936 ; X64-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6937 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6938 ; X64-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
6939 ; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6940 ; X64-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
6941 ; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
6942 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
6943 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6944 ; X64-NEXT: retq # encoding: [0xc3]
6945 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
6946 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
6947 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
6948 %res3 = add <2 x i64> %res, %res1
6949 %res4 = add <2 x i64> %res3, %res2
6953 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
6955 define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
6956 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
6958 ; X86-NEXT: vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0]
6959 ; X86-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6960 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6961 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6962 ; X86-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
6963 ; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6964 ; X86-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
6965 ; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6966 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6967 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6968 ; X86-NEXT: retl # encoding: [0xc3]
6970 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
6972 ; X64-NEXT: vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0]
6973 ; X64-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6974 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6975 ; X64-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
6976 ; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6977 ; X64-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
6978 ; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
6979 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
6980 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6981 ; X64-NEXT: retq # encoding: [0xc3]
6982 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
6983 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
6984 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
6985 %res3 = add <4 x i64> %res, %res1
6986 %res4 = add <4 x i64> %res3, %res2
6990 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
6992 define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
6993 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
6995 ; X86-NEXT: vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0]
6996 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6997 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6998 ; X86-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
6999 ; X86-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
7000 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7001 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7002 ; X86-NEXT: retl # encoding: [0xc3]
7004 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
7006 ; X64-NEXT: vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0]
7007 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7008 ; X64-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
7009 ; X64-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
7010 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7011 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7012 ; X64-NEXT: retq # encoding: [0xc3]
7013 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
7014 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
7015 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
7016 %res3 = add <4 x i32> %res, %res1
7017 %res4 = add <4 x i32> %res3, %res2
7021 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
7023 define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
7024 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
7026 ; X86-NEXT: vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0]
7027 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7028 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7029 ; X86-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
7030 ; X86-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
7031 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7032 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7033 ; X86-NEXT: retl # encoding: [0xc3]
7035 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
7037 ; X64-NEXT: vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0]
7038 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7039 ; X64-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
7040 ; X64-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
7041 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7042 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7043 ; X64-NEXT: retq # encoding: [0xc3]
7044 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
7045 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
7046 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
7047 %res3 = add <8 x i32> %res, %res1
7048 %res4 = add <8 x i32> %res3, %res2
7052 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
7054 define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
7055 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
7057 ; X86-NEXT: vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0]
7058 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7059 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7060 ; X86-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
7061 ; X86-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
7062 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7063 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7064 ; X86-NEXT: retl # encoding: [0xc3]
7066 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
7068 ; X64-NEXT: vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0]
7069 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7070 ; X64-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
7071 ; X64-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
7072 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7073 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7074 ; X64-NEXT: retq # encoding: [0xc3]
7075 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
7076 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
7077 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
7078 %res3 = add <2 x i64> %res, %res1
7079 %res4 = add <2 x i64> %res3, %res2
7083 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
7085 define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
7086 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
7088 ; X86-NEXT: vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0]
7089 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7090 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7091 ; X86-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
7092 ; X86-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
7093 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7094 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7095 ; X86-NEXT: retl # encoding: [0xc3]
7097 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
7099 ; X64-NEXT: vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0]
7100 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7101 ; X64-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
7102 ; X64-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
7103 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7104 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7105 ; X64-NEXT: retq # encoding: [0xc3]
7106 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
7107 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
7108 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
7109 %res3 = add <4 x i64> %res, %res1
7110 %res4 = add <4 x i64> %res3, %res2
7114 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
7116 define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
7117 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
7119 ; X86-NEXT: vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0]
7120 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7121 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7122 ; X86-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
7123 ; X86-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
7124 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7125 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7126 ; X86-NEXT: retl # encoding: [0xc3]
7128 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
7130 ; X64-NEXT: vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0]
7131 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7132 ; X64-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
7133 ; X64-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
7134 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
7135 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
7136 ; X64-NEXT: retq # encoding: [0xc3]
7137 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
7138 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
7139 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
7140 %res3 = add <4 x i32> %res, %res1
7141 %res4 = add <4 x i32> %res3, %res2
7145 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
7147 define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
7148 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
7150 ; X86-NEXT: vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0]
7151 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7152 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7153 ; X86-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
7154 ; X86-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
7155 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7156 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7157 ; X86-NEXT: retl # encoding: [0xc3]
7159 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
7161 ; X64-NEXT: vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0]
7162 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7163 ; X64-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
7164 ; X64-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
7165 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
7166 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7167 ; X64-NEXT: retq # encoding: [0xc3]
7168 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
7169 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
7170 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
7171 %res3 = add <8 x i32> %res, %res1
7172 %res4 = add <8 x i32> %res3, %res2
7176 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
7178 define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
7179 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
7181 ; X86-NEXT: vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0]
7182 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7183 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7184 ; X86-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
7185 ; X86-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
7186 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7187 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7188 ; X86-NEXT: retl # encoding: [0xc3]
7190 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
7192 ; X64-NEXT: vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0]
7193 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7194 ; X64-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
7195 ; X64-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
7196 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7197 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7198 ; X64-NEXT: retq # encoding: [0xc3]
7199 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
7200 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
7201 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
7202 %res3 = add <2 x i64> %res, %res1
7203 %res4 = add <2 x i64> %res3, %res2
7207 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
7209 define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
7210 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
7212 ; X86-NEXT: vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0]
7213 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7214 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7215 ; X86-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
7216 ; X86-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
7217 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7218 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7219 ; X86-NEXT: retl # encoding: [0xc3]
7221 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
7223 ; X64-NEXT: vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0]
7224 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7225 ; X64-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
7226 ; X64-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
7227 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7228 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7229 ; X64-NEXT: retq # encoding: [0xc3]
7230 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
7231 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
7232 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
7233 %res3 = add <4 x i64> %res, %res1
7234 %res4 = add <4 x i64> %res3, %res2
7238 declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
7240 define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7241 ; X86-LABEL: test_int_x86_avx512_mask_psra_q_128:
7243 ; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9]
7244 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7245 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7246 ; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
7247 ; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
7248 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7249 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7250 ; X86-NEXT: retl # encoding: [0xc3]
7252 ; X64-LABEL: test_int_x86_avx512_mask_psra_q_128:
7254 ; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9]
7255 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7256 ; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
7257 ; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
7258 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7259 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7260 ; X64-NEXT: retq # encoding: [0xc3]
7261 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
7262 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
7263 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
7264 %res3 = add <2 x i64> %res, %res1
7265 %res4 = add <2 x i64> %res3, %res2
7269 declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
7271 define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
7272 ; X86-LABEL: test_int_x86_avx512_mask_psra_q_256:
7274 ; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9]
7275 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7276 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7277 ; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
7278 ; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
7279 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7280 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7281 ; X86-NEXT: retl # encoding: [0xc3]
7283 ; X64-LABEL: test_int_x86_avx512_mask_psra_q_256:
7285 ; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9]
7286 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7287 ; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
7288 ; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
7289 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7290 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7291 ; X64-NEXT: retq # encoding: [0xc3]
7292 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
7293 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
7294 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
7295 %res3 = add <4 x i64> %res, %res1
7296 %res4 = add <4 x i64> %res3, %res2
7300 declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8)
7302 define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
7303 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_128:
7305 ; X86-NEXT: vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03]
7306 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
7307 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7308 ; X86-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
7309 ; X86-NEXT: vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03]
7310 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7311 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7312 ; X86-NEXT: retl # encoding: [0xc3]
7314 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_128:
7316 ; X64-NEXT: vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03]
7317 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
7318 ; X64-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
7319 ; X64-NEXT: vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03]
7320 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
7321 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
7322 ; X64-NEXT: retq # encoding: [0xc3]
7323 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
7324 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
7325 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
7326 %res3 = add <2 x i64> %res, %res1
7327 %res4 = add <2 x i64> %res3, %res2
7331 declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8)
7333 define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
7334 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_256:
7336 ; X86-NEXT: vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03]
7337 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
7338 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7339 ; X86-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
7340 ; X86-NEXT: vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03]
7341 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7342 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7343 ; X86-NEXT: retl # encoding: [0xc3]
7345 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_256:
7347 ; X64-NEXT: vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03]
7348 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
7349 ; X64-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
7350 ; X64-NEXT: vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03]
7351 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
7352 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
7353 ; X64-NEXT: retq # encoding: [0xc3]
7354 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
7355 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
7356 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
7357 %res3 = add <4 x i64> %res, %res1
7358 %res4 = add <4 x i64> %res3, %res2
7362 declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
7364 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7365 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128:
7367 ; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9]
7368 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7369 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7370 ; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
7371 ; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
7372 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7373 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7374 ; X86-NEXT: retl # encoding: [0xc3]
7376 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128:
7378 ; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9]
7379 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7380 ; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
7381 ; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
7382 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
7383 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
7384 ; X64-NEXT: retq # encoding: [0xc3]
7385 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
7386 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
7387 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
7388 %res3 = add <2 x i64> %res, %res1
7389 %res4 = add <2 x i64> %res3, %res2
7393 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
7394 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
7396 ; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,0,4294967287,4294967295]
7397 ; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
7398 ; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
7399 ; X86-NEXT: vpsravq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
7400 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
7401 ; X86-NEXT: retl # encoding: [0xc3]
7403 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
7405 ; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,18446744073709551607]
7406 ; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
7407 ; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
7408 ; X64-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
7409 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
7410 ; X64-NEXT: retq # encoding: [0xc3]
7411 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
7415 declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
7417 define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
7418 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_256:
7420 ; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9]
7421 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7422 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7423 ; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
7424 ; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
7425 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7426 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7427 ; X86-NEXT: retl # encoding: [0xc3]
7429 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_256:
7431 ; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9]
7432 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7433 ; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
7434 ; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
7435 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
7436 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
7437 ; X64-NEXT: retq # encoding: [0xc3]
7438 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
7439 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
7440 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
7441 %res3 = add <4 x i64> %res, %res1
7442 %res4 = add <4 x i64> %res3, %res2
7446 declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
7448 define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
7449 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
7451 ; X86-NEXT: vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0]
7452 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7453 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7454 ; X86-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
7455 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7456 ; X86-NEXT: retl # encoding: [0xc3]
7458 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
7460 ; X64-NEXT: vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0]
7461 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7462 ; X64-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
7463 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7464 ; X64-NEXT: retq # encoding: [0xc3]
7465 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
7466 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
7467 %res2 = fadd <2 x double> %res, %res1
7468 ret <2 x double> %res2
7471 declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
7473 define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
7474 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
7476 ; X86-NEXT: vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0]
7477 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7478 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7479 ; X86-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
7480 ; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7481 ; X86-NEXT: retl # encoding: [0xc3]
7483 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
7485 ; X64-NEXT: vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0]
7486 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7487 ; X64-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
7488 ; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7489 ; X64-NEXT: retq # encoding: [0xc3]
7490 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
7491 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
7492 %res2 = fadd <4 x double> %res, %res1
7493 ret <4 x double> %res2
7496 declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
7498 define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
7499 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
7501 ; X86-NEXT: vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
7502 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7503 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7504 ; X86-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
7505 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7506 ; X86-NEXT: retl # encoding: [0xc3]
7508 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
7510 ; X64-NEXT: vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
7511 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7512 ; X64-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
7513 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
7514 ; X64-NEXT: retq # encoding: [0xc3]
7515 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
7516 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
7517 %res2 = fadd <2 x double> %res, %res1
7518 ret <2 x double> %res2
7521 declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
7523 define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
7524 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
7526 ; X86-NEXT: vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
7527 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7528 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7529 ; X86-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
7530 ; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7531 ; X86-NEXT: retl # encoding: [0xc3]
7533 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
7535 ; X64-NEXT: vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
7536 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7537 ; X64-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
7538 ; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
7539 ; X64-NEXT: retq # encoding: [0xc3]
7540 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
7541 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
7542 %res2 = fadd <4 x double> %res, %res1
7543 ret <4 x double> %res2
7546 declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
7548 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
7549 ; X86-LABEL: test_int_x86_avx512_mask_valign_d_128:
7551 ; X86-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7552 ; X86-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7553 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7554 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7555 ; X86-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
7556 ; X86-NEXT: # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
7557 ; X86-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
7558 ; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
7559 ; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
7560 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
7561 ; X86-NEXT: retl # encoding: [0xc3]
7563 ; X64-LABEL: test_int_x86_avx512_mask_valign_d_128:
7565 ; X64-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7566 ; X64-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7567 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7568 ; X64-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
7569 ; X64-NEXT: # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
7570 ; X64-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
7571 ; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
7572 ; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
7573 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
7574 ; X64-NEXT: retq # encoding: [0xc3]
7575 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4)
7576 %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1)
7577 %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> zeroinitializer,i8 %x4)
7578 %res3 = add <4 x i32> %res, %res1
7579 %res4 = add <4 x i32> %res3, %res2
7583 declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
7585 define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
7586 ; X86-LABEL: test_int_x86_avx512_mask_valign_d_256:
7588 ; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7589 ; X86-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2]
7590 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7591 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7592 ; X86-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
7593 ; X86-NEXT: # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
7594 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
7595 ; X86-NEXT: retl # encoding: [0xc3]
7597 ; X64-LABEL: test_int_x86_avx512_mask_valign_d_256:
7599 ; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7600 ; X64-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2]
7601 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7602 ; X64-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
7603 ; X64-NEXT: # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
7604 ; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
7605 ; X64-NEXT: retq # encoding: [0xc3]
7606 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4)
7607 %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1)
7608 %res2 = add <8 x i32> %res, %res1
7612 declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
7614 define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
7615 ; X86-LABEL: test_int_x86_avx512_mask_valign_q_128:
7617 ; X86-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7618 ; X86-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7619 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7620 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7621 ; X86-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
7622 ; X86-NEXT: # xmm2 {%k1} = xmm1[1],xmm0[0]
7623 ; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
7624 ; X86-NEXT: retl # encoding: [0xc3]
7626 ; X64-LABEL: test_int_x86_avx512_mask_valign_q_128:
7628 ; X64-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
7629 ; X64-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
7630 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7631 ; X64-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
7632 ; X64-NEXT: # xmm2 {%k1} = xmm1[1],xmm0[0]
7633 ; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
7634 ; X64-NEXT: retq # encoding: [0xc3]
7635 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4)
7636 %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1)
7637 %res2 = add <2 x i64> %res, %res1
7641 declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
7643 define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
7644 ; X86-LABEL: test_int_x86_avx512_mask_valign_q_256:
7646 ; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7647 ; X86-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2]
7648 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7649 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7650 ; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
7651 ; X86-NEXT: # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
7652 ; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
7653 ; X86-NEXT: retl # encoding: [0xc3]
7655 ; X64-LABEL: test_int_x86_avx512_mask_valign_q_256:
7657 ; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
7658 ; X64-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2]
7659 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7660 ; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
7661 ; X64-NEXT: # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
7662 ; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
7663 ; X64-NEXT: retq # encoding: [0xc3]
7664 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4)
7665 %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1)
7666 %res2 = add <4 x i64> %res, %res1
7670 declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
7672 define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
7673 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
7675 ; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9]
7676 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7677 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7678 ; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
7679 ; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
7680 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
7681 ; X86-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
7682 ; X86-NEXT: retl # encoding: [0xc3]
7684 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
7686 ; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9]
7687 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7688 ; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
7689 ; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
7690 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
7691 ; X64-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
7692 ; X64-NEXT: retq # encoding: [0xc3]
7693 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
7694 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
7695 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
7696 %res3 = fadd <4 x double> %res, %res1
7697 %res4 = fadd <4 x double> %res2, %res3
7698 ret <4 x double> %res4
7701 declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
7703 define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
7704 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
7706 ; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9]
7707 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7708 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7709 ; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
7710 ; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
7711 ; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
7712 ; X86-NEXT: vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
7713 ; X86-NEXT: retl # encoding: [0xc3]
7715 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
7717 ; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9]
7718 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7719 ; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
7720 ; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
7721 ; X64-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
7722 ; X64-NEXT: vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
7723 ; X64-NEXT: retq # encoding: [0xc3]
7724 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
7725 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
7726 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
7727 %res3 = fadd <2 x double> %res, %res1
7728 %res4 = fadd <2 x double> %res3, %res2
7729 ret <2 x double> %res4
7732 declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
7734 define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
7735 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
7737 ; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9]
7738 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7739 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7740 ; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
7741 ; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
7742 ; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
7743 ; X86-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
7744 ; X86-NEXT: retl # encoding: [0xc3]
7746 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
7748 ; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9]
7749 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7750 ; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
7751 ; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
7752 ; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
7753 ; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
7754 ; X64-NEXT: retq # encoding: [0xc3]
7755 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
7756 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
7757 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
7758 %res3 = fadd <8 x float> %res, %res1
7759 %res4 = fadd <8 x float> %res3, %res2
7760 ret <8 x float> %res4
7763 declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
7765 define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
7766 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
7768 ; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9]
7769 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7770 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7771 ; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
7772 ; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
7773 ; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7774 ; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
7775 ; X86-NEXT: retl # encoding: [0xc3]
7777 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
7779 ; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9]
7780 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7781 ; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
7782 ; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
7783 ; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7784 ; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
7785 ; X64-NEXT: retq # encoding: [0xc3]
7786 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
7787 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
7788 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
7789 %res3 = fadd <4 x float> %res, %res1
7790 %res4 = fadd <4 x float> %res2, %res3
7791 ret <4 x float> %res4
7794 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
7796 define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
7797 ; X86-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
7799 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
7800 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7801 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7802 ; X86-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
7803 ; X86-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
7804 ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
7805 ; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7806 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
7807 ; X86-NEXT: retl # encoding: [0xc3]
7809 ; X64-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
7811 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
7812 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7813 ; X64-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
7814 ; X64-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
7815 ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
7816 ; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
7817 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
7818 ; X64-NEXT: retq # encoding: [0xc3]
7819 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
7820 %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
7821 %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
7822 %res3 = fadd <4 x float> %res, %res1
7823 %res4 = fadd <4 x float> %res2, %res3
7824 ret <4 x float> %res4
7827 declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
7829 define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
7830 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
7832 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
7833 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7834 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7835 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
7836 ; X86-NEXT: vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3]
7837 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
7838 ; X86-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
7839 ; X86-NEXT: retl # encoding: [0xc3]
7841 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
7843 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
7844 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7845 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
7846 ; X64-NEXT: vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3]
7847 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
7848 ; X64-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
7849 ; X64-NEXT: retq # encoding: [0xc3]
7850 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
7851 %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
7852 %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
7853 %res3 = fadd <8 x float> %res, %res1
7854 %res4 = fadd <8 x float> %res2, %res3
7855 ret <8 x float> %res4
7858 declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
7860 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
7861 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
7863 ; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
7864 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7865 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7866 ; X86-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
7867 ; X86-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
7868 ; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
7869 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
7870 ; X86-NEXT: retl # encoding: [0xc3]
7872 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
7874 ; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
7875 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7876 ; X64-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
7877 ; X64-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
7878 ; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
7879 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
7880 ; X64-NEXT: retq # encoding: [0xc3]
7882 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
7883 %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
7884 %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
7885 %res3 = add <8 x i32> %res, %res1
7886 %res4 = add <8 x i32> %res2, %res3
7890 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
7891 ; X86-LABEL: test_mm512_maskz_max_ps_256:
7893 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7894 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7895 ; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
7896 ; X86-NEXT: retl # encoding: [0xc3]
7898 ; X64-LABEL: test_mm512_maskz_max_ps_256:
7900 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7901 ; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
7902 ; X64-NEXT: retq # encoding: [0xc3]
7903 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
7904 ret <8 x float> %res
7907 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
7908 ; X86-LABEL: test_mm512_mask_max_ps_256:
7910 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7911 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7912 ; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
7913 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
7914 ; X86-NEXT: retl # encoding: [0xc3]
7916 ; X64-LABEL: test_mm512_mask_max_ps_256:
7918 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7919 ; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
7920 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
7921 ; X64-NEXT: retq # encoding: [0xc3]
7922 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
7923 ret <8 x float> %res
7926 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
7927 ; CHECK-LABEL: test_mm512_max_ps_256:
7929 ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
7930 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7931 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
7932 ret <8 x float> %res
7934 declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
7936 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
7937 ; X86-LABEL: test_mm512_maskz_max_ps_128:
7939 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7940 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7941 ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
7942 ; X86-NEXT: retl # encoding: [0xc3]
7944 ; X64-LABEL: test_mm512_maskz_max_ps_128:
7946 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7947 ; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
7948 ; X64-NEXT: retq # encoding: [0xc3]
7949 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
7950 ret <4 x float> %res
7953 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
7954 ; X86-LABEL: test_mm512_mask_max_ps_128:
7956 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7957 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7958 ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
7959 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
7960 ; X86-NEXT: retl # encoding: [0xc3]
7962 ; X64-LABEL: test_mm512_mask_max_ps_128:
7964 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7965 ; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
7966 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
7967 ; X64-NEXT: retq # encoding: [0xc3]
7968 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
7969 ret <4 x float> %res
7972 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
7973 ; CHECK-LABEL: test_mm512_max_ps_128:
7975 ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
7976 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
7977 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
7978 ret <4 x float> %res
7980 declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
7982 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
7983 ; X86-LABEL: test_mm512_maskz_min_ps_256:
7985 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7986 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7987 ; X86-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
7988 ; X86-NEXT: retl # encoding: [0xc3]
7990 ; X64-LABEL: test_mm512_maskz_min_ps_256:
7992 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7993 ; X64-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
7994 ; X64-NEXT: retq # encoding: [0xc3]
7995 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
7996 ret <8 x float> %res
7999 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
8000 ; X86-LABEL: test_mm512_mask_min_ps_256:
8002 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8003 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8004 ; X86-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
8005 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
8006 ; X86-NEXT: retl # encoding: [0xc3]
8008 ; X64-LABEL: test_mm512_mask_min_ps_256:
8010 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8011 ; X64-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
8012 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
8013 ; X64-NEXT: retq # encoding: [0xc3]
8014 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
8015 ret <8 x float> %res
8018 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
8019 ; CHECK-LABEL: test_mm512_min_ps_256:
8021 ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
8022 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8023 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
8024 ret <8 x float> %res
8026 declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
8028 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
8029 ; X86-LABEL: test_mm512_maskz_min_ps_128:
8031 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8032 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8033 ; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
8034 ; X86-NEXT: retl # encoding: [0xc3]
8036 ; X64-LABEL: test_mm512_maskz_min_ps_128:
8038 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8039 ; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
8040 ; X64-NEXT: retq # encoding: [0xc3]
8041 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
8042 ret <4 x float> %res
8045 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
8046 ; X86-LABEL: test_mm512_mask_min_ps_128:
8048 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8049 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8050 ; X86-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
8051 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
8052 ; X86-NEXT: retl # encoding: [0xc3]
8054 ; X64-LABEL: test_mm512_mask_min_ps_128:
8056 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8057 ; X64-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
8058 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
8059 ; X64-NEXT: retq # encoding: [0xc3]
8060 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
8061 ret <4 x float> %res
8064 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
8065 ; CHECK-LABEL: test_mm512_min_ps_128:
8067 ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
8068 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8069 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
8070 ret <4 x float> %res
8072 declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
8074 define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
8075 ; CHECK-LABEL: test_cmp_d_256:
8077 ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
8078 ; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc8]
8079 ; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02]
8080 ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
8081 ; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x05]
8082 ; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe9]
8083 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8084 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8085 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8086 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8087 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8088 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8089 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8090 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8091 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8092 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8093 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8094 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8095 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8096 ; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
8097 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8098 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8099 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8100 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
8101 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8102 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
8103 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8104 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
8105 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8106 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
8107 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8108 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
8109 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8110 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
8111 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8112 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
8113 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8114 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
8115 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8119 define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
8120 ; X86-LABEL: test_mask_cmp_d_256:
8122 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
8123 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8124 ; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8125 ; X86-NEXT: vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
8126 ; X86-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
8127 ; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8128 ; X86-NEXT: vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
8129 ; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
8130 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
8131 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8132 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
8133 ; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
8134 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
8135 ; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
8136 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
8137 ; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
8138 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
8139 ; X86-NEXT: kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
8140 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
8141 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
8142 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
8143 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8144 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8145 ; X86-NEXT: retl # encoding: [0xc3]
8147 ; X64-LABEL: test_mask_cmp_d_256:
8149 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8150 ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8151 ; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
8152 ; X64-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
8153 ; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8154 ; X64-NEXT: vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
8155 ; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
8156 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8157 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8158 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8159 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8160 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8161 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8162 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8163 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8164 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8165 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8166 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8167 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8168 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8169 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
8170 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8171 ; X64-NEXT: retq # encoding: [0xc3]
8172 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
8173 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8174 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
8175 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8176 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
8177 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8178 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
8179 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8180 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
8181 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8182 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
8183 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8184 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
8185 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8186 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
8187 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8191 declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
8193 define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
8194 ; CHECK-LABEL: test_ucmp_d_256:
8196 ; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
8197 ; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01]
8198 ; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02]
8199 ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
8200 ; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x05]
8201 ; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x06]
8202 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8203 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8204 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8205 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8206 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8207 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8208 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8209 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8210 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8211 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8212 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8213 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8214 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8215 ; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
8216 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8217 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8218 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8219 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
8220 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8221 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
8222 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8223 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
8224 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8225 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
8226 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8227 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
8228 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8229 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
8230 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8231 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
8232 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8233 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
8234 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8238 define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
8239 ; X86-LABEL: test_mask_ucmp_d_256:
8241 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
8242 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8243 ; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8244 ; X86-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
8245 ; X86-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
8246 ; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8247 ; X86-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
8248 ; X86-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
8249 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
8250 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8251 ; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
8252 ; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
8253 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
8254 ; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
8255 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
8256 ; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
8257 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
8258 ; X86-NEXT: kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
8259 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
8260 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
8261 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
8262 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8263 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8264 ; X86-NEXT: retl # encoding: [0xc3]
8266 ; X64-LABEL: test_mask_ucmp_d_256:
8268 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8269 ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
8270 ; X64-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
8271 ; X64-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
8272 ; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
8273 ; X64-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
8274 ; X64-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
8275 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8276 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8277 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8278 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8279 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8280 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8281 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8282 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8283 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8284 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8285 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8286 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8287 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8288 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
8289 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8290 ; X64-NEXT: retq # encoding: [0xc3]
8291 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
8292 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8293 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
8294 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8295 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
8296 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8297 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
8298 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8299 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
8300 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8301 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
8302 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8303 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
8304 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8305 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
8306 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8310 declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
8312 define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
8313 ; CHECK-LABEL: test_cmp_q_256:
8315 ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
8316 ; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 # encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8]
8317 ; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
8318 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
8319 ; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05]
8320 ; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
8321 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8322 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8323 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8324 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8325 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8326 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8327 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8328 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8329 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8330 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8331 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8332 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8333 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8334 ; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8335 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8336 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8337 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8338 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
8339 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8340 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
8341 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8342 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
8343 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8344 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
8345 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8346 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
8347 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8348 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
8349 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8350 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
8351 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8352 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
8353 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8357 define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
8358 ; X86-LABEL: test_mask_cmp_q_256:
8360 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8361 ; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8362 ; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8363 ; X86-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
8364 ; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
8365 ; X86-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8366 ; X86-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05]
8367 ; X86-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
8368 ; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8369 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8370 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8371 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8372 ; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8373 ; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8374 ; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8375 ; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8376 ; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8377 ; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8378 ; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8379 ; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8380 ; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8381 ; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8382 ; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8383 ; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8384 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8385 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8386 ; X86-NEXT: retl # encoding: [0xc3]
8388 ; X64-LABEL: test_mask_cmp_q_256:
8390 ; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8391 ; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8392 ; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
8393 ; X64-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
8394 ; X64-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8395 ; X64-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05]
8396 ; X64-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
8397 ; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8398 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8399 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8400 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8401 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8402 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8403 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8404 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8405 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8406 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8407 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8408 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8409 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8410 ; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8411 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8412 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8413 ; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8414 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8415 ; X64-NEXT: retq # encoding: [0xc3]
8416 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
8417 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8418 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
8419 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8420 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
8421 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8422 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
8423 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8424 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
8425 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8426 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
8427 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8428 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
8429 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8430 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
8431 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8435 declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
8437 define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
8438 ; CHECK-LABEL: test_ucmp_q_256:
8440 ; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
8441 ; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
8442 ; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
8443 ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
8444 ; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
8445 ; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
8446 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8447 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8448 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8449 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8450 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8451 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8452 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8453 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8454 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8455 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8456 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8457 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8458 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8459 ; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8460 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8461 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8462 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8463 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
8464 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8465 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
8466 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8467 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
8468 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8469 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
8470 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8471 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
8472 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8473 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
8474 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8475 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
8476 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8477 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
8478 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8482 define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
8483 ; X86-LABEL: test_mask_ucmp_q_256:
8485 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8486 ; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8487 ; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8488 ; X86-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
8489 ; X86-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
8490 ; X86-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8491 ; X86-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
8492 ; X86-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
8493 ; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8494 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8495 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8496 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8497 ; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8498 ; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8499 ; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8500 ; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8501 ; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8502 ; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8503 ; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8504 ; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8505 ; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8506 ; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8507 ; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8508 ; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8509 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8510 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8511 ; X86-NEXT: retl # encoding: [0xc3]
8513 ; X64-LABEL: test_mask_ucmp_q_256:
8515 ; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8516 ; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
8517 ; X64-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
8518 ; X64-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
8519 ; X64-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
8520 ; X64-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
8521 ; X64-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
8522 ; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8523 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8524 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8525 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8526 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8527 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8528 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8529 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8530 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8531 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8532 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8533 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8534 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8535 ; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8536 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8537 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8538 ; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8539 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
8540 ; X64-NEXT: retq # encoding: [0xc3]
8541 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
8542 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8543 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
8544 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8545 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
8546 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8547 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
8548 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8549 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
8550 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8551 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
8552 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8553 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
8554 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8555 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
8556 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8560 declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
8562 define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
8563 ; CHECK-LABEL: test_cmp_d_128:
8565 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
8566 ; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8]
8567 ; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
8568 ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
8569 ; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05]
8570 ; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
8571 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8572 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8573 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8574 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8575 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8576 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8577 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8578 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8579 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8580 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8581 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8582 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8583 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8584 ; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8585 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8586 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8587 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
8588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8589 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
8590 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8591 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
8592 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8593 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
8594 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8595 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
8596 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8597 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
8598 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8599 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
8600 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8601 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
8602 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8606 define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
8607 ; X86-LABEL: test_mask_cmp_d_128:
8609 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8610 ; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8611 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8612 ; X86-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
8613 ; X86-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
8614 ; X86-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8615 ; X86-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05]
8616 ; X86-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
8617 ; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8618 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8619 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8620 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8621 ; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8622 ; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8623 ; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8624 ; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8625 ; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8626 ; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8627 ; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8628 ; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8629 ; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8630 ; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8631 ; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8632 ; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8633 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8634 ; X86-NEXT: retl # encoding: [0xc3]
8636 ; X64-LABEL: test_mask_cmp_d_128:
8638 ; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8639 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8640 ; X64-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
8641 ; X64-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
8642 ; X64-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8643 ; X64-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05]
8644 ; X64-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
8645 ; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8646 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8647 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8648 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8649 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8650 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8651 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8652 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8653 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8654 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8655 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8656 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8657 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8658 ; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8659 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8660 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8661 ; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8662 ; X64-NEXT: retq # encoding: [0xc3]
8663 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
8664 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8665 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
8666 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8667 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
8668 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8669 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
8670 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8671 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
8672 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8673 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
8674 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8675 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
8676 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8677 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
8678 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8682 declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
8684 define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
8685 ; CHECK-LABEL: test_ucmp_d_128:
8687 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
8688 ; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
8689 ; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
8690 ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
8691 ; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
8692 ; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
8693 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8694 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8695 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8696 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8697 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8698 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8699 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8700 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8701 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8702 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8703 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8704 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8705 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8706 ; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
8707 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8708 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8709 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
8710 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8711 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
8712 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8713 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
8714 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8715 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
8716 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8717 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
8718 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8719 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
8720 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8721 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
8722 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8723 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
8724 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8728 define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
8729 ; X86-LABEL: test_mask_ucmp_d_128:
8731 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8732 ; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8733 ; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8734 ; X86-NEXT: vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
8735 ; X86-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
8736 ; X86-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8737 ; X86-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
8738 ; X86-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
8739 ; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8740 ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8741 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8742 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8743 ; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8744 ; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8745 ; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8746 ; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8747 ; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8748 ; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8749 ; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8750 ; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8751 ; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8752 ; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8753 ; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8754 ; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8755 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8756 ; X86-NEXT: retl # encoding: [0xc3]
8758 ; X64-LABEL: test_mask_ucmp_d_128:
8760 ; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8761 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
8762 ; X64-NEXT: vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
8763 ; X64-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
8764 ; X64-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
8765 ; X64-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
8766 ; X64-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
8767 ; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
8768 ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
8769 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8770 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8771 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8772 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8773 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8774 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8775 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8776 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8777 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8778 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8779 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8780 ; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8781 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8782 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8783 ; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8784 ; X64-NEXT: retq # encoding: [0xc3]
8785 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
8786 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8787 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
8788 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8789 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
8790 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8791 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
8792 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8793 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
8794 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8795 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
8796 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8797 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
8798 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8799 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
8800 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8804 declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
8806 define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
8807 ; CHECK-LABEL: test_cmp_q_128:
8809 ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
8810 ; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8]
8811 ; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
8812 ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
8813 ; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05]
8814 ; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
8815 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8816 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8817 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8818 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8819 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8820 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8821 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8822 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8823 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8824 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8825 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8826 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8827 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8828 ; CHECK-NEXT: movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
8829 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8830 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8831 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
8832 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8833 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
8834 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8835 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
8836 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8837 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
8838 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8839 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
8840 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8841 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
8842 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8843 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
8844 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8845 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
8846 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8850 define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
8851 ; X86-LABEL: test_mask_cmp_q_128:
8853 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8854 ; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8855 ; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
8856 ; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
8857 ; X86-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
8858 ; X86-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
8859 ; X86-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05]
8860 ; X86-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
8861 ; X86-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
8862 ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
8863 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8864 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8865 ; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8866 ; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8867 ; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8868 ; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8869 ; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8870 ; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8871 ; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8872 ; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8873 ; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8874 ; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8875 ; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8876 ; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8877 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8878 ; X86-NEXT: retl # encoding: [0xc3]
8880 ; X64-LABEL: test_mask_cmp_q_128:
8882 ; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
8883 ; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
8884 ; X64-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
8885 ; X64-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
8886 ; X64-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
8887 ; X64-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05]
8888 ; X64-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
8889 ; X64-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
8890 ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
8891 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8892 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8893 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8894 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8895 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8896 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8897 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8898 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8899 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8900 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8901 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8902 ; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8903 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8904 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8905 ; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8906 ; X64-NEXT: retq # encoding: [0xc3]
8907 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
8908 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8909 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
8910 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8911 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
8912 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8913 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
8914 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8915 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
8916 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8917 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
8918 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8919 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
8920 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8921 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
8922 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8926 declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
8928 define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
8929 ; CHECK-LABEL: test_ucmp_q_128:
8931 ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
8932 ; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
8933 ; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
8934 ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
8935 ; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
8936 ; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
8937 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8938 ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8939 ; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8940 ; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8941 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8942 ; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8943 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8944 ; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8945 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8946 ; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8947 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8948 ; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8949 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8950 ; CHECK-NEXT: movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
8951 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
8952 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
8953 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
8954 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
8955 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
8956 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
8957 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
8958 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
8959 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
8960 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
8961 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
8962 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
8963 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
8964 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
8965 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
8966 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
8967 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
8968 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
8972 define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
8973 ; X86-LABEL: test_mask_ucmp_q_128:
8975 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8976 ; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
8977 ; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
8978 ; X86-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
8979 ; X86-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
8980 ; X86-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
8981 ; X86-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
8982 ; X86-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
8983 ; X86-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
8984 ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
8985 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
8986 ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
8987 ; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
8988 ; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
8989 ; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
8990 ; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
8991 ; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
8992 ; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
8993 ; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
8994 ; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
8995 ; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
8996 ; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
8997 ; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
8998 ; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
8999 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
9000 ; X86-NEXT: retl # encoding: [0xc3]
9002 ; X64-LABEL: test_mask_ucmp_q_128:
9004 ; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
9005 ; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
9006 ; X64-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
9007 ; X64-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
9008 ; X64-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
9009 ; X64-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
9010 ; X64-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
9011 ; X64-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
9012 ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
9013 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9014 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
9015 ; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
9016 ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9017 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
9018 ; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9019 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
9020 ; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9021 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
9022 ; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9023 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
9024 ; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
9025 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
9026 ; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9027 ; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
9028 ; X64-NEXT: retq # encoding: [0xc3]
9029 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
9030 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9031 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
9032 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9033 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
9034 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9035 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
9036 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9037 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
9038 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9039 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
9040 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9041 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
9042 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9043 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
9044 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9048 declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
9050 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
9052 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
9053 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
9055 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
9056 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
9057 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9058 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9059 ; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
9060 ; X86-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
9061 ; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
9062 ; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
9063 ; X86-NEXT: retl # encoding: [0xc3]
9065 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
9067 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
9068 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
9069 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9070 ; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
9071 ; X64-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
9072 ; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
9073 ; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
9074 ; X64-NEXT: retq # encoding: [0xc3]
9075 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
9076 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
9077 %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
9078 %res4 = fadd <8 x float> %res1, %res2
9079 %res5 = fadd <8 x float> %res3, %res4
9080 ret <8 x float> %res5
9083 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256_load(<4 x float>* %x0ptr, <8 x float> %x2, i8 %mask) {
9084 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
9086 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9087 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9088 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9089 ; X86-NEXT: vbroadcastf32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x00]
9090 ; X86-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9091 ; X86-NEXT: retl # encoding: [0xc3]
9093 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
9095 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9096 ; X64-NEXT: vbroadcastf32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x07]
9097 ; X64-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9098 ; X64-NEXT: retq # encoding: [0xc3]
9099 %x0 = load <4 x float>, <4 x float>* %x0ptr
9100 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
9101 ret <8 x float> %res
9104 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
9106 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
9107 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
9109 ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
9110 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
9111 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9112 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9113 ; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
9114 ; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
9115 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
9116 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
9117 ; X86-NEXT: retl # encoding: [0xc3]
9119 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
9121 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
9122 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
9123 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9124 ; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
9125 ; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
9126 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
9127 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
9128 ; X64-NEXT: retq # encoding: [0xc3]
9129 %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
9130 %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
9131 %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
9132 %res4 = add <8 x i32> %res1, %res2
9133 %res5 = add <8 x i32> %res3, %res4
9137 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256_load(<4 x i32>* %x0ptr, <8 x i32> %x2, i8 %mask) {
9138 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
9140 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9141 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9142 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9143 ; X86-NEXT: vbroadcasti32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x00]
9144 ; X86-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9145 ; X86-NEXT: retl # encoding: [0xc3]
9147 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
9149 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9150 ; X64-NEXT: vbroadcasti32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x07]
9151 ; X64-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
9152 ; X64-NEXT: retq # encoding: [0xc3]
9153 %x0 = load <4 x i32>, <4 x i32>* %x0ptr
9154 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
9158 declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
9160 define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
9161 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_128:
9163 ; X86-NEXT: vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0]
9164 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9165 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9166 ; X86-NEXT: vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
9167 ; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2]
9168 ; X86-NEXT: retl # encoding: [0xc3]
9170 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_128:
9172 ; X64-NEXT: vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0]
9173 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9174 ; X64-NEXT: vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
9175 ; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2]
9176 ; X64-NEXT: retq # encoding: [0xc3]
9177 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
9178 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
9179 %res2 = add <2 x i64> %res, %res1
9183 declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
9185 define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
9186 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_256:
9188 ; X86-NEXT: vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0]
9189 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9190 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9191 ; X86-NEXT: vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
9192 ; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2]
9193 ; X86-NEXT: retl # encoding: [0xc3]
9195 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_256:
9197 ; X64-NEXT: vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0]
9198 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9199 ; X64-NEXT: vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
9200 ; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2]
9201 ; X64-NEXT: retq # encoding: [0xc3]
9202 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
9203 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
9204 %res2 = add <4 x i64> %res, %res1
9208 declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
9210 define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9211 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_128:
9213 ; X86-NEXT: vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0]
9214 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9215 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9216 ; X86-NEXT: vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
9217 ; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
9218 ; X86-NEXT: retl # encoding: [0xc3]
9220 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_128:
9222 ; X64-NEXT: vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0]
9223 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9224 ; X64-NEXT: vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
9225 ; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
9226 ; X64-NEXT: retq # encoding: [0xc3]
9227 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
9228 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
9229 %res2 = add <4 x i32> %res, %res1
9233 declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
9235 define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
9236 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_256:
9238 ; X86-NEXT: vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0]
9239 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9240 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9241 ; X86-NEXT: vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
9242 ; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
9243 ; X86-NEXT: retl # encoding: [0xc3]
9245 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_256:
9247 ; X64-NEXT: vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0]
9248 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9249 ; X64-NEXT: vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
9250 ; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
9251 ; X64-NEXT: retq # encoding: [0xc3]
9252 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
9253 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
9254 %res2 = add <8 x i32> %res, %res1
9258 declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
9260 define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9261 ; X86-LABEL: test_int_x86_avx512_ptestm_d_128:
9263 ; X86-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
9264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9265 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9266 ; X86-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
9267 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9268 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9269 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9270 ; X86-NEXT: # kill: def $al killed $al killed $eax
9271 ; X86-NEXT: retl # encoding: [0xc3]
9273 ; X64-LABEL: test_int_x86_avx512_ptestm_d_128:
9275 ; X64-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
9276 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9277 ; X64-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
9278 ; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9279 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9280 ; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9281 ; X64-NEXT: # kill: def $al killed $al killed $eax
9282 ; X64-NEXT: retq # encoding: [0xc3]
9283 %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
9284 %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
9285 %res2 = add i8 %res, %res1
9289 declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
9291 define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
9292 ; X86-LABEL: test_int_x86_avx512_ptestm_d_256:
9294 ; X86-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
9295 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9296 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
9297 ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
9298 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9299 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9300 ; X86-NEXT: retl # encoding: [0xc3]
9302 ; X64-LABEL: test_int_x86_avx512_ptestm_d_256:
9304 ; X64-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
9305 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9306 ; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7]
9307 ; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8]
9308 ; X64-NEXT: # kill: def $al killed $al killed $eax
9309 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9310 ; X64-NEXT: retq # encoding: [0xc3]
9311 %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
9312 %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
9313 %res2 = add i8 %res, %res1
9317 declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
9319 define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
9320 ; X86-LABEL: test_int_x86_avx512_ptestm_q_128:
9322 ; X86-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
9323 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9324 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9325 ; X86-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
9326 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9327 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9328 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9329 ; X86-NEXT: # kill: def $al killed $al killed $eax
9330 ; X86-NEXT: retl # encoding: [0xc3]
9332 ; X64-LABEL: test_int_x86_avx512_ptestm_q_128:
9334 ; X64-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
9335 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9336 ; X64-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
9337 ; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9338 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9339 ; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9340 ; X64-NEXT: # kill: def $al killed $al killed $eax
9341 ; X64-NEXT: retq # encoding: [0xc3]
9342 %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
9343 %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
9344 %res2 = add i8 %res, %res1
9348 declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
9350 define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
9351 ; X86-LABEL: test_int_x86_avx512_ptestm_q_256:
9353 ; X86-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
9354 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9355 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9356 ; X86-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
9357 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9358 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9359 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9360 ; X86-NEXT: # kill: def $al killed $al killed $eax
9361 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9362 ; X86-NEXT: retl # encoding: [0xc3]
9364 ; X64-LABEL: test_int_x86_avx512_ptestm_q_256:
9366 ; X64-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
9367 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9368 ; X64-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
9369 ; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9370 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9371 ; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9372 ; X64-NEXT: # kill: def $al killed $al killed $eax
9373 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9374 ; X64-NEXT: retq # encoding: [0xc3]
9375 %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
9376 %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
9377 %res2 = add i8 %res, %res1
9381 declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
9383 define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9384 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_128:
9386 ; X86-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
9387 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9388 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9389 ; X86-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
9390 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9391 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9392 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9393 ; X86-NEXT: # kill: def $al killed $al killed $eax
9394 ; X86-NEXT: retl # encoding: [0xc3]
9396 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_128:
9398 ; X64-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
9399 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9400 ; X64-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
9401 ; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9402 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9403 ; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9404 ; X64-NEXT: # kill: def $al killed $al killed $eax
9405 ; X64-NEXT: retq # encoding: [0xc3]
9406 %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
9407 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
9408 %res2 = add i8 %res, %res1
9412 declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
9414 define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
9415 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_256:
9417 ; X86-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
9418 ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9419 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
9420 ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
9421 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9422 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9423 ; X86-NEXT: retl # encoding: [0xc3]
9425 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_256:
9427 ; X64-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
9428 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9429 ; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7]
9430 ; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8]
9431 ; X64-NEXT: # kill: def $al killed $al killed $eax
9432 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9433 ; X64-NEXT: retq # encoding: [0xc3]
9434 %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
9435 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
9436 %res2 = add i8 %res, %res1
9440 declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
9442 define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
9443 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_128:
9445 ; X86-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
9446 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9447 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9448 ; X86-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
9449 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9450 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9451 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9452 ; X86-NEXT: # kill: def $al killed $al killed $eax
9453 ; X86-NEXT: retl # encoding: [0xc3]
9455 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_128:
9457 ; X64-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
9458 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9459 ; X64-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
9460 ; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9461 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9462 ; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9463 ; X64-NEXT: # kill: def $al killed $al killed $eax
9464 ; X64-NEXT: retq # encoding: [0xc3]
9465 %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
9466 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
9467 %res2 = add i8 %res, %res1
9471 declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
9473 define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
9474 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_256:
9476 ; X86-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
9477 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9478 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9479 ; X86-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
9480 ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9481 ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9482 ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9483 ; X86-NEXT: # kill: def $al killed $al killed $eax
9484 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9485 ; X86-NEXT: retl # encoding: [0xc3]
9487 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_256:
9489 ; X64-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
9490 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9491 ; X64-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
9492 ; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9493 ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9494 ; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
9495 ; X64-NEXT: # kill: def $al killed $al killed $eax
9496 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9497 ; X64-NEXT: retq # encoding: [0xc3]
9498 %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
9499 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
9500 %res2 = add i8 %res, %res1
9504 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
9505 ; CHECK-LABEL: test_cmpps_256:
9507 ; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
9508 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9509 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
9510 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9511 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9512 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
9515 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
9517 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
9518 ; CHECK-LABEL: test_cmpps_128:
9520 ; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
9521 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9522 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
9523 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9524 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
9527 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
9529 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
9530 ; CHECK-LABEL: test_cmppd_256:
9532 ; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
9533 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9534 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
9535 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
9536 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9537 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
9540 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
9542 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
9543 ; CHECK-LABEL: test_cmppd_128:
9545 ; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
9546 ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9547 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
9548 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9549 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
9552 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
9554 define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
9555 ; CHECK-LABEL: test_mask_mul_epi32_rr_128:
9557 ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
9558 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9559 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9563 define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
9564 ; X86-LABEL: test_mask_mul_epi32_rrk_128:
9566 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9567 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9568 ; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
9569 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9570 ; X86-NEXT: retl # encoding: [0xc3]
9572 ; X64-LABEL: test_mask_mul_epi32_rrk_128:
9574 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9575 ; X64-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
9576 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9577 ; X64-NEXT: retq # encoding: [0xc3]
9578 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9582 define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
9583 ; X86-LABEL: test_mask_mul_epi32_rrkz_128:
9585 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9586 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9587 ; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
9588 ; X86-NEXT: retl # encoding: [0xc3]
9590 ; X64-LABEL: test_mask_mul_epi32_rrkz_128:
9592 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9593 ; X64-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
9594 ; X64-NEXT: retq # encoding: [0xc3]
9595 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9599 define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
9600 ; X86-LABEL: test_mask_mul_epi32_rm_128:
9602 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9603 ; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x00]
9604 ; X86-NEXT: retl # encoding: [0xc3]
9606 ; X64-LABEL: test_mask_mul_epi32_rm_128:
9608 ; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07]
9609 ; X64-NEXT: retq # encoding: [0xc3]
9610 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9611 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9615 define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
9616 ; X86-LABEL: test_mask_mul_epi32_rmk_128:
9618 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9619 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9620 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9621 ; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x08]
9622 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9623 ; X86-NEXT: retl # encoding: [0xc3]
9625 ; X64-LABEL: test_mask_mul_epi32_rmk_128:
9627 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9628 ; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
9629 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9630 ; X64-NEXT: retq # encoding: [0xc3]
9631 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9632 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9636 define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
9637 ; X86-LABEL: test_mask_mul_epi32_rmkz_128:
9639 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9640 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9641 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9642 ; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x00]
9643 ; X86-NEXT: retl # encoding: [0xc3]
9645 ; X64-LABEL: test_mask_mul_epi32_rmkz_128:
9647 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9648 ; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
9649 ; X64-NEXT: retq # encoding: [0xc3]
9650 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9651 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9655 define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
9656 ; X86-LABEL: test_mask_mul_epi32_rmb_128:
9658 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9659 ; X86-NEXT: vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
9660 ; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
9661 ; X86-NEXT: retl # encoding: [0xc3]
9663 ; X64-LABEL: test_mask_mul_epi32_rmb_128:
9665 ; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
9666 ; X64-NEXT: retq # encoding: [0xc3]
9667 %q = load i64, i64* %ptr_b
9668 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
9669 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
9670 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
9671 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9675 define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
9676 ; X86-LABEL: test_mask_mul_epi32_rmbk_128:
9678 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9679 ; X86-NEXT: vpbroadcastd (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x10]
9680 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9681 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9682 ; X86-NEXT: vpmuldq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xca]
9683 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9684 ; X86-NEXT: retl # encoding: [0xc3]
9686 ; X64-LABEL: test_mask_mul_epi32_rmbk_128:
9688 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9689 ; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
9690 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9691 ; X64-NEXT: retq # encoding: [0xc3]
9692 %q = load i64, i64* %ptr_b
9693 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
9694 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
9695 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
9696 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9700 define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
9701 ; X86-LABEL: test_mask_mul_epi32_rmbkz_128:
9703 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9704 ; X86-NEXT: vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
9705 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9706 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9707 ; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
9708 ; X86-NEXT: retl # encoding: [0xc3]
9710 ; X64-LABEL: test_mask_mul_epi32_rmbkz_128:
9712 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9713 ; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
9714 ; X64-NEXT: retq # encoding: [0xc3]
9715 %q = load i64, i64* %ptr_b
9716 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
9717 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
9718 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
9719 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9723 declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
9725 define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
9726 ; CHECK-LABEL: test_mask_mul_epi32_rr_256:
9728 ; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
9729 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9730 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
9734 define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
9735 ; X86-LABEL: test_mask_mul_epi32_rrk_256:
9737 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9738 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9739 ; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
9740 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9741 ; X86-NEXT: retl # encoding: [0xc3]
9743 ; X64-LABEL: test_mask_mul_epi32_rrk_256:
9745 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9746 ; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
9747 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9748 ; X64-NEXT: retq # encoding: [0xc3]
9749 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
9753 define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
9754 ; X86-LABEL: test_mask_mul_epi32_rrkz_256:
9756 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9757 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9758 ; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
9759 ; X86-NEXT: retl # encoding: [0xc3]
9761 ; X64-LABEL: test_mask_mul_epi32_rrkz_256:
9763 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9764 ; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
9765 ; X64-NEXT: retq # encoding: [0xc3]
9766 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
9770 define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
9771 ; X86-LABEL: test_mask_mul_epi32_rm_256:
9773 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9774 ; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x00]
9775 ; X86-NEXT: retl # encoding: [0xc3]
9777 ; X64-LABEL: test_mask_mul_epi32_rm_256:
9779 ; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07]
9780 ; X64-NEXT: retq # encoding: [0xc3]
9781 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
9782 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
9786 define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
9787 ; X86-LABEL: test_mask_mul_epi32_rmk_256:
9789 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9790 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9791 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9792 ; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x08]
9793 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9794 ; X86-NEXT: retl # encoding: [0xc3]
9796 ; X64-LABEL: test_mask_mul_epi32_rmk_256:
9798 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9799 ; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
9800 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9801 ; X64-NEXT: retq # encoding: [0xc3]
9802 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
9803 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
9807 define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
9808 ; X86-LABEL: test_mask_mul_epi32_rmkz_256:
9810 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9811 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9812 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9813 ; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x00]
9814 ; X86-NEXT: retl # encoding: [0xc3]
9816 ; X64-LABEL: test_mask_mul_epi32_rmkz_256:
9818 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9819 ; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
9820 ; X64-NEXT: retq # encoding: [0xc3]
9821 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
9822 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
9826 define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
9827 ; X86-LABEL: test_mask_mul_epi32_rmb_256:
9829 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9830 ; X86-NEXT: vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
9831 ; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
9832 ; X86-NEXT: retl # encoding: [0xc3]
9834 ; X64-LABEL: test_mask_mul_epi32_rmb_256:
9836 ; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
9837 ; X64-NEXT: retq # encoding: [0xc3]
9838 %q = load i64, i64* %ptr_b
9839 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
9840 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
9841 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
9842 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
9846 define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
9847 ; X86-LABEL: test_mask_mul_epi32_rmbk_256:
9849 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9850 ; X86-NEXT: vpbroadcastd (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x10]
9851 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9852 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9853 ; X86-NEXT: vpmuldq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xca]
9854 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9855 ; X86-NEXT: retl # encoding: [0xc3]
9857 ; X64-LABEL: test_mask_mul_epi32_rmbk_256:
9859 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9860 ; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
9861 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
9862 ; X64-NEXT: retq # encoding: [0xc3]
9863 %q = load i64, i64* %ptr_b
9864 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
9865 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
9866 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
9867 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
9871 define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
9872 ; X86-LABEL: test_mask_mul_epi32_rmbkz_256:
9874 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9875 ; X86-NEXT: vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
9876 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
9877 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9878 ; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
9879 ; X86-NEXT: retl # encoding: [0xc3]
9881 ; X64-LABEL: test_mask_mul_epi32_rmbkz_256:
9883 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9884 ; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
9885 ; X64-NEXT: retq # encoding: [0xc3]
9886 %q = load i64, i64* %ptr_b
9887 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
9888 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
9889 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
9890 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
9894 declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
9896 define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
9897 ; CHECK-LABEL: test_mask_mul_epu32_rr_128:
9899 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
9900 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
9901 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9905 define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
9906 ; X86-LABEL: test_mask_mul_epu32_rrk_128:
9908 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9909 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9910 ; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
9911 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9912 ; X86-NEXT: retl # encoding: [0xc3]
9914 ; X64-LABEL: test_mask_mul_epu32_rrk_128:
9916 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9917 ; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
9918 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
9919 ; X64-NEXT: retq # encoding: [0xc3]
9920 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9924 define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
9925 ; X86-LABEL: test_mask_mul_epu32_rrkz_128:
9927 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9928 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9929 ; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
9930 ; X86-NEXT: retl # encoding: [0xc3]
9932 ; X64-LABEL: test_mask_mul_epu32_rrkz_128:
9934 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9935 ; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
9936 ; X64-NEXT: retq # encoding: [0xc3]
9937 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9941 define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
9942 ; X86-LABEL: test_mask_mul_epu32_rm_128:
9944 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9945 ; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x00]
9946 ; X86-NEXT: retl # encoding: [0xc3]
9948 ; X64-LABEL: test_mask_mul_epu32_rm_128:
9950 ; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07]
9951 ; X64-NEXT: retq # encoding: [0xc3]
9952 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9953 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
9957 define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
9958 ; X86-LABEL: test_mask_mul_epu32_rmk_128:
9960 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9961 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9962 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9963 ; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x08]
9964 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9965 ; X86-NEXT: retl # encoding: [0xc3]
9967 ; X64-LABEL: test_mask_mul_epu32_rmk_128:
9969 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9970 ; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
9971 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
9972 ; X64-NEXT: retq # encoding: [0xc3]
9973 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9974 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
9978 define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
9979 ; X86-LABEL: test_mask_mul_epu32_rmkz_128:
9981 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9982 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9983 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
9984 ; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x00]
9985 ; X86-NEXT: retl # encoding: [0xc3]
9987 ; X64-LABEL: test_mask_mul_epu32_rmkz_128:
9989 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
9990 ; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
9991 ; X64-NEXT: retq # encoding: [0xc3]
9992 %b = load < 4 x i32>, < 4 x i32>* %ptr_b
9993 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
9997 define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
9998 ; X86-LABEL: test_mask_mul_epu32_rmb_128:
10000 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10001 ; X86-NEXT: vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
10002 ; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
10003 ; X86-NEXT: retl # encoding: [0xc3]
10005 ; X64-LABEL: test_mask_mul_epu32_rmb_128:
10007 ; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
10008 ; X64-NEXT: retq # encoding: [0xc3]
10009 %q = load i64, i64* %ptr_b
10010 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
10011 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
10012 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
10013 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
10014 ret < 2 x i64> %res
10017 define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
10018 ; X86-LABEL: test_mask_mul_epu32_rmbk_128:
10020 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10021 ; X86-NEXT: vpbroadcastd (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x10]
10022 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10023 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10024 ; X86-NEXT: vpmuludq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xca]
10025 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10026 ; X86-NEXT: retl # encoding: [0xc3]
10028 ; X64-LABEL: test_mask_mul_epu32_rmbk_128:
10030 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10031 ; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
10032 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10033 ; X64-NEXT: retq # encoding: [0xc3]
10034 %q = load i64, i64* %ptr_b
10035 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
10036 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
10037 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
10038 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
10039 ret < 2 x i64> %res
10042 define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
10043 ; X86-LABEL: test_mask_mul_epu32_rmbkz_128:
10045 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10046 ; X86-NEXT: vpbroadcastd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x08]
10047 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10048 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10049 ; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
10050 ; X86-NEXT: retl # encoding: [0xc3]
10052 ; X64-LABEL: test_mask_mul_epu32_rmbkz_128:
10054 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10055 ; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
10056 ; X64-NEXT: retq # encoding: [0xc3]
10057 %q = load i64, i64* %ptr_b
10058 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
10059 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
10060 %b = bitcast < 2 x i64> %b64 to < 4 x i32>
10061 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
10062 ret < 2 x i64> %res
10065 declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
10067 define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
10068 ; CHECK-LABEL: test_mask_mul_epu32_rr_256:
10070 ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
10071 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
10072 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
10073 ret < 4 x i64> %res
10076 define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
10077 ; X86-LABEL: test_mask_mul_epu32_rrk_256:
10079 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10080 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10081 ; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
10082 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
10083 ; X86-NEXT: retl # encoding: [0xc3]
10085 ; X64-LABEL: test_mask_mul_epu32_rrk_256:
10087 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10088 ; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
10089 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
10090 ; X64-NEXT: retq # encoding: [0xc3]
10091 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
10092 ret < 4 x i64> %res
10095 define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
10096 ; X86-LABEL: test_mask_mul_epu32_rrkz_256:
10098 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10099 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10100 ; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
10101 ; X86-NEXT: retl # encoding: [0xc3]
10103 ; X64-LABEL: test_mask_mul_epu32_rrkz_256:
10105 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10106 ; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
10107 ; X64-NEXT: retq # encoding: [0xc3]
10108 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
10109 ret < 4 x i64> %res
10112 define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
10113 ; X86-LABEL: test_mask_mul_epu32_rm_256:
10115 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10116 ; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x00]
10117 ; X86-NEXT: retl # encoding: [0xc3]
10119 ; X64-LABEL: test_mask_mul_epu32_rm_256:
10121 ; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07]
10122 ; X64-NEXT: retq # encoding: [0xc3]
10123 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
10124 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
10125 ret < 4 x i64> %res
10128 define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
10129 ; X86-LABEL: test_mask_mul_epu32_rmk_256:
10131 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10132 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10133 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
10134 ; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x08]
10135 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10136 ; X86-NEXT: retl # encoding: [0xc3]
10138 ; X64-LABEL: test_mask_mul_epu32_rmk_256:
10140 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10141 ; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
10142 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10143 ; X64-NEXT: retq # encoding: [0xc3]
10144 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
10145 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
10146 ret < 4 x i64> %res
10149 define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
10150 ; X86-LABEL: test_mask_mul_epu32_rmkz_256:
10152 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10153 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10154 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
10155 ; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x00]
10156 ; X86-NEXT: retl # encoding: [0xc3]
10158 ; X64-LABEL: test_mask_mul_epu32_rmkz_256:
10160 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10161 ; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
10162 ; X64-NEXT: retq # encoding: [0xc3]
10163 %b = load < 8 x i32>, < 8 x i32>* %ptr_b
10164 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
10165 ret < 4 x i64> %res
10168 define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
10169 ; X86-LABEL: test_mask_mul_epu32_rmb_256:
10171 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10172 ; X86-NEXT: vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
10173 ; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
10174 ; X86-NEXT: retl # encoding: [0xc3]
10176 ; X64-LABEL: test_mask_mul_epu32_rmb_256:
10178 ; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
10179 ; X64-NEXT: retq # encoding: [0xc3]
10180 %q = load i64, i64* %ptr_b
10181 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
10182 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
10183 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
10184 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
10185 ret < 4 x i64> %res
10188 define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
10189 ; X86-LABEL: test_mask_mul_epu32_rmbk_256:
10191 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10192 ; X86-NEXT: vpbroadcastd (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x10]
10193 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10194 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10195 ; X86-NEXT: vpmuludq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xca]
10196 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10197 ; X86-NEXT: retl # encoding: [0xc3]
10199 ; X64-LABEL: test_mask_mul_epu32_rmbk_256:
10201 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10202 ; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
10203 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10204 ; X64-NEXT: retq # encoding: [0xc3]
10205 %q = load i64, i64* %ptr_b
10206 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
10207 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
10208 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
10209 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
10210 ret < 4 x i64> %res
10213 define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
10214 ; X86-LABEL: test_mask_mul_epu32_rmbkz_256:
10216 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10217 ; X86-NEXT: vpbroadcastd (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x08]
10218 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
10219 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10220 ; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
10221 ; X86-NEXT: retl # encoding: [0xc3]
10223 ; X64-LABEL: test_mask_mul_epu32_rmbkz_256:
10225 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10226 ; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
10227 ; X64-NEXT: retq # encoding: [0xc3]
10228 %q = load i64, i64* %ptr_b
10229 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
10230 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
10231 %b = bitcast < 4 x i64> %b64 to < 8 x i32>
10232 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
10233 ret < 4 x i64> %res
10236 declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
10238 declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
10240 define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
10241 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
10243 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0]
10244 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10245 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10246 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
10247 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10248 ; X86-NEXT: retl # encoding: [0xc3]
10250 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
10252 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0]
10253 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10254 ; X64-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
10255 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10256 ; X64-NEXT: retq # encoding: [0xc3]
10257 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
10258 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
10259 %res2 = fadd <4 x float> %res, %res1
10260 ret <4 x float> %res2
10263 declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
10265 define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
10266 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
10268 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0]
10269 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10270 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10271 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
10272 ; X86-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10273 ; X86-NEXT: retl # encoding: [0xc3]
10275 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
10277 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0]
10278 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10279 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
10280 ; X64-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10281 ; X64-NEXT: retq # encoding: [0xc3]
10282 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
10283 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
10284 %res2 = fadd <8 x float> %res, %res1
10285 ret <8 x float> %res2
10288 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
10290 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
10291 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
10293 ; X86-NEXT: vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0]
10294 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10295 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10296 ; X86-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
10297 ; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10298 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
10299 ; X86-NEXT: retl # encoding: [0xc3]
10301 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
10303 ; X64-NEXT: vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0]
10304 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10305 ; X64-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
10306 ; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10307 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
10308 ; X64-NEXT: retq # encoding: [0xc3]
10309 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
10310 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
10311 %res2 = add <4 x i32> %res, %res1
10312 ret <4 x i32> %res2
10315 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
10317 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
10318 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
10320 ; X86-NEXT: vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0]
10321 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10322 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10323 ; X86-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
10324 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10325 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
10326 ; X86-NEXT: retl # encoding: [0xc3]
10328 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
10330 ; X64-NEXT: vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0]
10331 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10332 ; X64-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
10333 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10334 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
10335 ; X64-NEXT: retq # encoding: [0xc3]
10336 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
10337 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
10338 %res2 = fadd <4 x float> %res, %res1
10339 ret <4 x float> %res2
10342 declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
10344 define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
10345 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
10347 ; X86-NEXT: vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0]
10348 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10349 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10350 ; X86-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
10351 ; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
10352 ; X86-NEXT: retl # encoding: [0xc3]
10354 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
10356 ; X64-NEXT: vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0]
10357 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10358 ; X64-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
10359 ; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
10360 ; X64-NEXT: retq # encoding: [0xc3]
10361 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
10362 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
10363 %res2 = fadd <4 x double> %res, %res1
10364 ret <4 x double> %res2
10367 declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
10369 define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
10370 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
10372 ; X86-NEXT: vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0]
10373 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10374 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10375 ; X86-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
10376 ; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
10377 ; X86-NEXT: retl # encoding: [0xc3]
10379 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
10381 ; X64-NEXT: vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0]
10382 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10383 ; X64-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
10384 ; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
10385 ; X64-NEXT: retq # encoding: [0xc3]
10386 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
10387 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
10388 %res2 = fadd <2 x double> %res, %res1
10389 ret <2 x double> %res2
10392 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
10394 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
10395 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
10397 ; X86-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
10398 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10399 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10400 ; X86-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
10401 ; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10402 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
10403 ; X86-NEXT: retl # encoding: [0xc3]
10405 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
10407 ; X64-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
10408 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10409 ; X64-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
10410 ; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10411 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
10412 ; X64-NEXT: retq # encoding: [0xc3]
10413 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
10414 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
10415 %res2 = add <4 x i32> %res, %res1
10416 ret <4 x i32> %res2
10419 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
10421 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
10422 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
10424 ; X86-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
10425 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10426 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10427 ; X86-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
10428 ; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10429 ; X86-NEXT: retl # encoding: [0xc3]
10431 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
10433 ; X64-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
10434 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10435 ; X64-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
10436 ; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
10437 ; X64-NEXT: retq # encoding: [0xc3]
10438 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
10439 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
10440 %res2 = add <4 x i32> %res, %res1
10441 ret <4 x i32> %res2
10444 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
10446 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
10447 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
10449 ; X86-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
10450 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10451 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10452 ; X86-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
10453 ; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
10454 ; X86-NEXT: retl # encoding: [0xc3]
10456 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
10458 ; X64-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
10459 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10460 ; X64-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
10461 ; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
10462 ; X64-NEXT: retq # encoding: [0xc3]
10463 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
10464 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
10465 %res2 = add <8 x i32> %res, %res1
10466 ret <8 x i32> %res2
10469 declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
10471 define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
10472 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
10474 ; X86-NEXT: vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
10475 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10476 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10477 ; X86-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
10478 ; X86-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
10479 ; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
10480 ; X86-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
10481 ; X86-NEXT: retl # encoding: [0xc3]
10483 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
10485 ; X64-NEXT: vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
10486 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10487 ; X64-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
10488 ; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
10489 ; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
10490 ; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
10491 ; X64-NEXT: retq # encoding: [0xc3]
10492 %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
10493 %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
10494 %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
10495 %res3 = fadd <8 x float> %res, %res1
10496 %res4 = fadd <8 x float> %res3, %res2
10497 ret <8 x float> %res4
10500 declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10502 define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10503 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_256:
10505 ; X86-NEXT: vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
10506 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10507 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10508 ; X86-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
10509 ; X86-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
10510 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10511 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
10512 ; X86-NEXT: retl # encoding: [0xc3]
10514 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_256:
10516 ; X64-NEXT: vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
10517 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10518 ; X64-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
10519 ; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
10520 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10521 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
10522 ; X64-NEXT: retq # encoding: [0xc3]
10523 %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10524 %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
10525 %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
10526 %res3 = add <8 x i32> %res, %res1
10527 %res4 = add <8 x i32> %res3, %res2
10528 ret <8 x i32> %res4
10531 declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
10533 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
10534 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
10536 ; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
10537 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10538 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10539 ; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
10540 ; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
10541 ; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
10542 ; X86-NEXT: vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
10543 ; X86-NEXT: retl # encoding: [0xc3]
10545 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
10547 ; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
10548 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10549 ; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
10550 ; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
10551 ; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
10552 ; X64-NEXT: vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
10553 ; X64-NEXT: retq # encoding: [0xc3]
10554 %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
10555 %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
10556 %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
10557 %res3 = fadd <4 x double> %res, %res1
10558 %res4 = fadd <4 x double> %res3, %res2
10559 ret <4 x double> %res4
10562 declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
10564 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
10565 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
10567 ; X86-NEXT: vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
10568 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10569 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10570 ; X86-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
10571 ; X86-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
10572 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10573 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
10574 ; X86-NEXT: retl # encoding: [0xc3]
10576 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
10578 ; X64-NEXT: vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
10579 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10580 ; X64-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
10581 ; X64-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
10582 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10583 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
10584 ; X64-NEXT: retq # encoding: [0xc3]
10585 %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
10586 %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
10587 %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
10588 %res3 = add <4 x i64> %res, %res1
10589 %res4 = add <4 x i64> %res3, %res2
10590 ret <4 x i64> %res4
10593 declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
10595 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
10596 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
10598 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10599 ; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10600 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10601 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10602 ; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
10603 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10604 ; X86-NEXT: retl # encoding: [0xc3]
10606 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
10608 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10609 ; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10610 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10611 ; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
10612 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10613 ; X64-NEXT: retq # encoding: [0xc3]
10614 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
10615 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
10616 %res2 = add <4 x i32> %res, %res1
10617 ret <4 x i32> %res2
10620 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
10622 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
10623 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
10625 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10626 ; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10627 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10628 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10629 ; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
10630 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10631 ; X86-NEXT: retl # encoding: [0xc3]
10633 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
10635 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10636 ; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
10637 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10638 ; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
10639 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
10640 ; X64-NEXT: retq # encoding: [0xc3]
10641 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
10642 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
10643 %res2 = add <4 x i32> %res, %res1
10644 ret <4 x i32> %res2
10647 declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
10649 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
10650 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
10652 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10653 ; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10654 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10655 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10656 ; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
10657 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10658 ; X86-NEXT: retl # encoding: [0xc3]
10660 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
10662 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10663 ; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10664 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10665 ; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
10666 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10667 ; X64-NEXT: retq # encoding: [0xc3]
10668 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
10669 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
10670 %res2 = add <8 x i32> %res, %res1
10671 ret <8 x i32> %res2
10674 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
10676 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
10677 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
10679 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10680 ; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10681 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10682 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10683 ; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
10684 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10685 ; X86-NEXT: retl # encoding: [0xc3]
10687 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
10689 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10690 ; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
10691 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10692 ; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
10693 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
10694 ; X64-NEXT: retq # encoding: [0xc3]
10695 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
10696 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
10697 %res2 = add <8 x i32> %res, %res1
10698 ret <8 x i32> %res2
10701 declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
10703 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
10704 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
10706 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10707 ; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10708 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10709 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10710 ; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
10711 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10712 ; X86-NEXT: retl # encoding: [0xc3]
10714 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
10716 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10717 ; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10718 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10719 ; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
10720 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10721 ; X64-NEXT: retq # encoding: [0xc3]
10722 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
10723 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
10724 %res2 = add <2 x i64> %res, %res1
10725 ret <2 x i64> %res2
10728 declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
10730 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
10731 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
10733 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10734 ; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10735 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10736 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10737 ; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
10738 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10739 ; X86-NEXT: retl # encoding: [0xc3]
10741 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
10743 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10744 ; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
10745 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10746 ; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
10747 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
10748 ; X64-NEXT: retq # encoding: [0xc3]
10749 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
10750 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
10751 %res2 = add <2 x i64> %res, %res1
10752 ret <2 x i64> %res2
10755 declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
10757 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
10758 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
10760 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10761 ; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10762 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10763 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10764 ; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
10765 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10766 ; X86-NEXT: retl # encoding: [0xc3]
10768 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
10770 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10771 ; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10772 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10773 ; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
10774 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10775 ; X64-NEXT: retq # encoding: [0xc3]
10776 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
10777 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
10778 %res2 = add <4 x i64> %res, %res1
10779 ret <4 x i64> %res2
10782 declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
10784 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
10785 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
10787 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10788 ; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10789 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10790 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10791 ; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
10792 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10793 ; X86-NEXT: retl # encoding: [0xc3]
10795 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
10797 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10798 ; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
10799 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10800 ; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
10801 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
10802 ; X64-NEXT: retq # encoding: [0xc3]
10803 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
10804 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
10805 %res2 = add <4 x i64> %res, %res1
10806 ret <4 x i64> %res2
10809 declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
10811 define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
10812 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
10814 ; X86-NEXT: vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0]
10815 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10816 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10817 ; X86-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
10818 ; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10819 ; X86-NEXT: retl # encoding: [0xc3]
10821 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
10823 ; X64-NEXT: vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0]
10824 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10825 ; X64-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
10826 ; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
10827 ; X64-NEXT: retq # encoding: [0xc3]
10828 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
10829 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
10830 %res2 = fadd <4 x float> %res, %res1
10831 ret <4 x float> %res2
10834 declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
10836 define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
10837 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
10839 ; X86-NEXT: vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0]
10840 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10841 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10842 ; X86-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
10843 ; X86-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10844 ; X86-NEXT: retl # encoding: [0xc3]
10846 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
10848 ; X64-NEXT: vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0]
10849 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10850 ; X64-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
10851 ; X64-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
10852 ; X64-NEXT: retq # encoding: [0xc3]
10853 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
10854 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
10855 %res2 = fadd <8 x float> %res, %res1
10856 ret <8 x float> %res2
10859 declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
10861 define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
10862 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
10864 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10865 ; X86-NEXT: vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
10866 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10867 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10868 ; X86-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
10869 ; X86-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10870 ; X86-NEXT: retl # encoding: [0xc3]
10872 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
10874 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
10875 ; X64-NEXT: vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
10876 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10877 ; X64-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
10878 ; X64-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10879 ; X64-NEXT: retq # encoding: [0xc3]
10880 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10881 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
10882 %res2 = add <4 x i32> %res, %res1
10883 ret <4 x i32> %res2
10886 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
10888 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
10889 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
10891 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10892 ; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10893 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10894 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10895 ; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
10896 ; X86-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10897 ; X86-NEXT: retl # encoding: [0xc3]
10899 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
10901 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10902 ; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10903 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10904 ; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
10905 ; X64-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10906 ; X64-NEXT: retq # encoding: [0xc3]
10907 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10908 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
10909 %res2 = add <4 x i32> %res, %res1
10910 ret <4 x i32> %res2
10913 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
10915 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
10916 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
10918 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10919 ; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10920 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10921 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10922 ; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
10923 ; X86-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10924 ; X86-NEXT: retl # encoding: [0xc3]
10926 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
10928 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
10929 ; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
10930 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10931 ; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
10932 ; X64-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
10933 ; X64-NEXT: retq # encoding: [0xc3]
10934 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10935 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
10936 %res2 = add <4 x i32> %res, %res1
10937 ret <4 x i32> %res2
10940 declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10942 define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10943 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
10945 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10946 ; X86-NEXT: vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
10947 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10948 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10949 ; X86-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
10950 ; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10951 ; X86-NEXT: retl # encoding: [0xc3]
10953 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
10955 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
10956 ; X64-NEXT: vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
10957 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10958 ; X64-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
10959 ; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10960 ; X64-NEXT: retq # encoding: [0xc3]
10961 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10962 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
10963 %res2 = add <8 x i32> %res, %res1
10964 ret <8 x i32> %res2
10967 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10969 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10970 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
10972 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
10973 ; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
10974 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10975 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10976 ; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
10977 ; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10978 ; X86-NEXT: retl # encoding: [0xc3]
10980 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
10982 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
10983 ; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
10984 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10985 ; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
10986 ; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
10987 ; X64-NEXT: retq # encoding: [0xc3]
10988 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10989 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
10990 %res2 = add <8 x i32> %res, %res1
10991 ret <8 x i32> %res2
10994 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
10996 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
10997 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
10999 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11000 ; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
11001 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11002 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11003 ; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
11004 ; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
11005 ; X86-NEXT: retl # encoding: [0xc3]
11007 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
11009 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11010 ; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
11011 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11012 ; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
11013 ; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
11014 ; X64-NEXT: retq # encoding: [0xc3]
11015 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
11016 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
11017 %res2 = add <8 x i32> %res, %res1
11018 ret <8 x i32> %res2
11021 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
11023 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
11024 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
11026 ; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
11027 ; X86-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
11028 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11029 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11030 ; X86-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
11031 ; X86-NEXT: vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
11032 ; X86-NEXT: retl # encoding: [0xc3]
11034 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
11036 ; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
11037 ; X64-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
11038 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11039 ; X64-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
11040 ; X64-NEXT: vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
11041 ; X64-NEXT: retq # encoding: [0xc3]
11042 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
11043 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
11044 %res2 = fadd <2 x double> %res, %res1
11045 ret <2 x double> %res2
11048 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
11050 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
11051 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
11053 ; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
11054 ; X86-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
11055 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11056 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11057 ; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
11058 ; X86-NEXT: vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
11059 ; X86-NEXT: retl # encoding: [0xc3]
11061 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
11063 ; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
11064 ; X64-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
11065 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11066 ; X64-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
11067 ; X64-NEXT: vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
11068 ; X64-NEXT: retq # encoding: [0xc3]
11069 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
11070 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
11071 %res2 = fadd <4 x double> %res, %res1
11072 ret <4 x double> %res2
11075 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
11077 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
11078 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
11080 ; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
11081 ; X86-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
11082 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11083 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11084 ; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11085 ; X86-NEXT: vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
11086 ; X86-NEXT: retl # encoding: [0xc3]
11088 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
11090 ; X64-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
11091 ; X64-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
11092 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11093 ; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11094 ; X64-NEXT: vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
11095 ; X64-NEXT: retq # encoding: [0xc3]
11096 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
11097 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
11098 %res2 = fadd <4 x float> %res, %res1
11099 ret <4 x float> %res2
11102 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
11103 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
11105 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11106 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11107 ; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11108 ; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11109 ; X86-NEXT: retl # encoding: [0xc3]
11111 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
11113 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11114 ; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
11115 ; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11116 ; X64-NEXT: retq # encoding: [0xc3]
11117 %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
11118 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
11119 ret <4 x float> %res
11122 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
11124 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
11125 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
11127 ; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
11128 ; X86-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
11129 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11130 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11131 ; X86-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
11132 ; X86-NEXT: vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
11133 ; X86-NEXT: retl # encoding: [0xc3]
11135 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
11137 ; X64-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
11138 ; X64-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
11139 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11140 ; X64-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
11141 ; X64-NEXT: vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
11142 ; X64-NEXT: retq # encoding: [0xc3]
11143 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
11144 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
11145 %res2 = fadd <8 x float> %res, %res1
11146 ret <8 x float> %res2
11149 declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
11151 define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
11152 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
11154 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
11155 ; X86-NEXT: vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
11156 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11157 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11158 ; X86-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
11159 ; X86-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11160 ; X86-NEXT: retl # encoding: [0xc3]
11162 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
11164 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
11165 ; X64-NEXT: vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
11166 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11167 ; X64-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
11168 ; X64-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11169 ; X64-NEXT: retq # encoding: [0xc3]
11170 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
11171 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
11172 %res2 = add <2 x i64> %res, %res1
11173 ret <2 x i64> %res2
11176 declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
11178 define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
11179 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
11181 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11182 ; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11183 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11184 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11185 ; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
11186 ; X86-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11187 ; X86-NEXT: retl # encoding: [0xc3]
11189 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
11191 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11192 ; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11193 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11194 ; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
11195 ; X64-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11196 ; X64-NEXT: retq # encoding: [0xc3]
11197 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
11198 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
11199 %res2 = add <2 x i64> %res, %res1
11200 ret <2 x i64> %res2
11203 declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
11205 define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
11206 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
11208 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11209 ; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11210 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11211 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11212 ; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
11213 ; X86-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11214 ; X86-NEXT: retl # encoding: [0xc3]
11216 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
11218 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
11219 ; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
11220 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11221 ; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
11222 ; X64-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
11223 ; X64-NEXT: retq # encoding: [0xc3]
11224 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
11225 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
11226 %res2 = add <2 x i64> %res, %res1
11227 ret <2 x i64> %res2
11230 declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
11232 define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
11233 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
11235 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
11236 ; X86-NEXT: vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
11237 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11238 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11239 ; X86-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
11240 ; X86-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11241 ; X86-NEXT: retl # encoding: [0xc3]
11243 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
11245 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
11246 ; X64-NEXT: vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
11247 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11248 ; X64-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
11249 ; X64-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11250 ; X64-NEXT: retq # encoding: [0xc3]
11251 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
11252 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
11253 %res2 = add <4 x i64> %res, %res1
11254 ret <4 x i64> %res2
11257 declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
11259 define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
11260 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
11262 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11263 ; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11265 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11266 ; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
11267 ; X86-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11268 ; X86-NEXT: retl # encoding: [0xc3]
11270 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
11272 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11273 ; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11274 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11275 ; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
11276 ; X64-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11277 ; X64-NEXT: retq # encoding: [0xc3]
11278 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
11279 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
11280 %res2 = add <4 x i64> %res, %res1
11281 ret <4 x i64> %res2
11284 declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
11286 define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
11287 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
11289 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11290 ; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11291 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11292 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11293 ; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
11294 ; X86-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11295 ; X86-NEXT: retl # encoding: [0xc3]
11297 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
11299 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
11300 ; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
11301 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11302 ; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
11303 ; X64-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
11304 ; X64-NEXT: retq # encoding: [0xc3]
11305 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
11306 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
11307 %res2 = add <4 x i64> %res, %res1
11308 ret <4 x i64> %res2
11311 define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
11312 ; X86-LABEL: test_mask_compress_store_pd_128:
11314 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11315 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11316 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11317 ; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
11318 ; X86-NEXT: retl # encoding: [0xc3]
11320 ; X64-LABEL: test_mask_compress_store_pd_128:
11322 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11323 ; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
11324 ; X64-NEXT: retq # encoding: [0xc3]
11325 call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11329 declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11331 define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
11332 ; X86-LABEL: test_compress_store_pd_128:
11334 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11335 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11336 ; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
11337 ; X86-NEXT: retl # encoding: [0xc3]
11339 ; X64-LABEL: test_compress_store_pd_128:
11341 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11342 ; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
11343 ; X64-NEXT: retq # encoding: [0xc3]
11344 call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
11348 define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
11349 ; X86-LABEL: test_mask_compress_store_ps_128:
11351 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11352 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11353 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11354 ; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
11355 ; X86-NEXT: retl # encoding: [0xc3]
11357 ; X64-LABEL: test_mask_compress_store_ps_128:
11359 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11360 ; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
11361 ; X64-NEXT: retq # encoding: [0xc3]
11362 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11366 declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11368 define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
11369 ; X86-LABEL: test_compress_store_ps_128:
11371 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11372 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11373 ; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
11374 ; X86-NEXT: retl # encoding: [0xc3]
11376 ; X64-LABEL: test_compress_store_ps_128:
11378 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11379 ; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
11380 ; X64-NEXT: retq # encoding: [0xc3]
11381 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
11385 define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
11386 ; X86-LABEL: test_mask_compress_store_q_128:
11388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11389 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11390 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11391 ; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
11392 ; X86-NEXT: retl # encoding: [0xc3]
11394 ; X64-LABEL: test_mask_compress_store_q_128:
11396 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11397 ; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
11398 ; X64-NEXT: retq # encoding: [0xc3]
11399 call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11403 declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11405 define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
11406 ; X86-LABEL: test_compress_store_q_128:
11408 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11409 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11410 ; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
11411 ; X86-NEXT: retl # encoding: [0xc3]
11413 ; X64-LABEL: test_compress_store_q_128:
11415 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11416 ; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
11417 ; X64-NEXT: retq # encoding: [0xc3]
11418 call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
11422 define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
11423 ; X86-LABEL: test_mask_compress_store_d_128:
11425 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11426 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11427 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11428 ; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
11429 ; X86-NEXT: retl # encoding: [0xc3]
11431 ; X64-LABEL: test_mask_compress_store_d_128:
11433 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11434 ; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
11435 ; X64-NEXT: retq # encoding: [0xc3]
11436 call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11440 declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11442 define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
11443 ; X86-LABEL: test_compress_store_d_128:
11445 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11446 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11447 ; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
11448 ; X86-NEXT: retl # encoding: [0xc3]
11450 ; X64-LABEL: test_compress_store_d_128:
11452 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11453 ; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
11454 ; X64-NEXT: retq # encoding: [0xc3]
11455 call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
11459 define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
11460 ; X86-LABEL: test_mask_expand_load_pd_128:
11462 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11463 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11464 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11465 ; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
11466 ; X86-NEXT: retl # encoding: [0xc3]
11468 ; X64-LABEL: test_mask_expand_load_pd_128:
11470 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11471 ; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
11472 ; X64-NEXT: retq # encoding: [0xc3]
11473 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11474 ret <2 x double> %res
11477 define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
11478 ; X86-LABEL: test_maskz_expand_load_pd_128:
11480 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11481 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11482 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11483 ; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00]
11484 ; X86-NEXT: retl # encoding: [0xc3]
11486 ; X64-LABEL: test_maskz_expand_load_pd_128:
11488 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11489 ; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07]
11490 ; X64-NEXT: retq # encoding: [0xc3]
11491 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
11492 ret <2 x double> %res
11495 declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
11497 define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
11498 ; X86-LABEL: test_expand_load_pd_128:
11500 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11501 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11502 ; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
11503 ; X86-NEXT: retl # encoding: [0xc3]
11505 ; X64-LABEL: test_expand_load_pd_128:
11507 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11508 ; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
11509 ; X64-NEXT: retq # encoding: [0xc3]
11510 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
11511 ret <2 x double> %res
11514 define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
11515 ; X86-LABEL: test_mask_expand_load_ps_128:
11517 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11518 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11519 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11520 ; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
11521 ; X86-NEXT: retl # encoding: [0xc3]
11523 ; X64-LABEL: test_mask_expand_load_ps_128:
11525 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11526 ; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
11527 ; X64-NEXT: retq # encoding: [0xc3]
11528 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11529 ret <4 x float> %res
11532 define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
11533 ; X86-LABEL: test_maskz_expand_load_ps_128:
11535 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11536 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11537 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11538 ; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00]
11539 ; X86-NEXT: retl # encoding: [0xc3]
11541 ; X64-LABEL: test_maskz_expand_load_ps_128:
11543 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11544 ; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07]
11545 ; X64-NEXT: retq # encoding: [0xc3]
11546 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
11547 ret <4 x float> %res
11550 declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
11552 define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
11553 ; X86-LABEL: test_expand_load_ps_128:
11555 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11556 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11557 ; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
11558 ; X86-NEXT: retl # encoding: [0xc3]
11560 ; X64-LABEL: test_expand_load_ps_128:
11562 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11563 ; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
11564 ; X64-NEXT: retq # encoding: [0xc3]
11565 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
11566 ret <4 x float> %res
11569 define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
11570 ; X86-LABEL: test_mask_expand_load_q_128:
11572 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11573 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11574 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11575 ; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
11576 ; X86-NEXT: retl # encoding: [0xc3]
11578 ; X64-LABEL: test_mask_expand_load_q_128:
11580 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11581 ; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
11582 ; X64-NEXT: retq # encoding: [0xc3]
11583 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11587 define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
11588 ; X86-LABEL: test_maskz_expand_load_q_128:
11590 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11591 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11592 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11593 ; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00]
11594 ; X86-NEXT: retl # encoding: [0xc3]
11596 ; X64-LABEL: test_maskz_expand_load_q_128:
11598 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11599 ; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07]
11600 ; X64-NEXT: retq # encoding: [0xc3]
11601 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
11605 declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
11607 define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
11608 ; X86-LABEL: test_expand_load_q_128:
11610 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11611 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11612 ; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
11613 ; X86-NEXT: retl # encoding: [0xc3]
11615 ; X64-LABEL: test_expand_load_q_128:
11617 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11618 ; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
11619 ; X64-NEXT: retq # encoding: [0xc3]
11620 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
11624 define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
11625 ; X86-LABEL: test_mask_expand_load_d_128:
11627 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11628 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11629 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11630 ; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
11631 ; X86-NEXT: retl # encoding: [0xc3]
11633 ; X64-LABEL: test_mask_expand_load_d_128:
11635 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11636 ; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
11637 ; X64-NEXT: retq # encoding: [0xc3]
11638 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11642 define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
11643 ; X86-LABEL: test_maskz_expand_load_d_128:
11645 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11646 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11647 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11648 ; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00]
11649 ; X86-NEXT: retl # encoding: [0xc3]
11651 ; X64-LABEL: test_maskz_expand_load_d_128:
11653 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11654 ; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07]
11655 ; X64-NEXT: retq # encoding: [0xc3]
11656 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
11660 declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
11662 define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
11663 ; X86-LABEL: test_expand_load_d_128:
11665 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11666 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11667 ; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
11668 ; X86-NEXT: retl # encoding: [0xc3]
11670 ; X64-LABEL: test_expand_load_d_128:
11672 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11673 ; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
11674 ; X64-NEXT: retq # encoding: [0xc3]
11675 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
11679 define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
11680 ; X86-LABEL: test_mask_compress_store_pd_256:
11682 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11683 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11684 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11685 ; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
11686 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11687 ; X86-NEXT: retl # encoding: [0xc3]
11689 ; X64-LABEL: test_mask_compress_store_pd_256:
11691 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11692 ; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
11693 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11694 ; X64-NEXT: retq # encoding: [0xc3]
11695 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11699 declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11701 define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
11702 ; X86-LABEL: test_compress_store_pd_256:
11704 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11705 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11706 ; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
11707 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11708 ; X86-NEXT: retl # encoding: [0xc3]
11710 ; X64-LABEL: test_compress_store_pd_256:
11712 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11713 ; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
11714 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11715 ; X64-NEXT: retq # encoding: [0xc3]
11716 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
11720 define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
11721 ; X86-LABEL: test_mask_compress_store_ps_256:
11723 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11724 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11725 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11726 ; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
11727 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11728 ; X86-NEXT: retl # encoding: [0xc3]
11730 ; X64-LABEL: test_mask_compress_store_ps_256:
11732 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11733 ; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
11734 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11735 ; X64-NEXT: retq # encoding: [0xc3]
11736 call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11740 declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11742 define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
11743 ; X86-LABEL: test_compress_store_ps_256:
11745 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11746 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11747 ; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
11748 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11749 ; X86-NEXT: retl # encoding: [0xc3]
11751 ; X64-LABEL: test_compress_store_ps_256:
11753 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11754 ; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
11755 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11756 ; X64-NEXT: retq # encoding: [0xc3]
11757 call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
11761 define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
11762 ; X86-LABEL: test_mask_compress_store_q_256:
11764 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11765 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11766 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11767 ; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
11768 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11769 ; X86-NEXT: retl # encoding: [0xc3]
11771 ; X64-LABEL: test_mask_compress_store_q_256:
11773 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11774 ; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
11775 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11776 ; X64-NEXT: retq # encoding: [0xc3]
11777 call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11781 declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11783 define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
11784 ; X86-LABEL: test_compress_store_q_256:
11786 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11787 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11788 ; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
11789 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11790 ; X86-NEXT: retl # encoding: [0xc3]
11792 ; X64-LABEL: test_compress_store_q_256:
11794 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11795 ; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
11796 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11797 ; X64-NEXT: retq # encoding: [0xc3]
11798 call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
11802 define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
11803 ; X86-LABEL: test_mask_compress_store_d_256:
11805 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11806 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11807 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11808 ; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
11809 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11810 ; X86-NEXT: retl # encoding: [0xc3]
11812 ; X64-LABEL: test_mask_compress_store_d_256:
11814 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11815 ; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
11816 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11817 ; X64-NEXT: retq # encoding: [0xc3]
11818 call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
11822 declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
11824 define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
11825 ; X86-LABEL: test_compress_store_d_256:
11827 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11828 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11829 ; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
11830 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11831 ; X86-NEXT: retl # encoding: [0xc3]
11833 ; X64-LABEL: test_compress_store_d_256:
11835 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11836 ; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
11837 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
11838 ; X64-NEXT: retq # encoding: [0xc3]
11839 call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
11843 define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
11844 ; X86-LABEL: test_mask_expand_load_pd_256:
11846 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11847 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11848 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11849 ; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
11850 ; X86-NEXT: retl # encoding: [0xc3]
11852 ; X64-LABEL: test_mask_expand_load_pd_256:
11854 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11855 ; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
11856 ; X64-NEXT: retq # encoding: [0xc3]
11857 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11858 ret <4 x double> %res
11861 define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
11862 ; X86-LABEL: test_maskz_expand_load_pd_256:
11864 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11865 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11866 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11867 ; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00]
11868 ; X86-NEXT: retl # encoding: [0xc3]
11870 ; X64-LABEL: test_maskz_expand_load_pd_256:
11872 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11873 ; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07]
11874 ; X64-NEXT: retq # encoding: [0xc3]
11875 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
11876 ret <4 x double> %res
11879 declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
11881 define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
11882 ; X86-LABEL: test_expand_load_pd_256:
11884 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11885 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11886 ; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
11887 ; X86-NEXT: retl # encoding: [0xc3]
11889 ; X64-LABEL: test_expand_load_pd_256:
11891 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11892 ; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
11893 ; X64-NEXT: retq # encoding: [0xc3]
11894 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
11895 ret <4 x double> %res
11898 define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
11899 ; X86-LABEL: test_mask_expand_load_ps_256:
11901 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11902 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11903 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11904 ; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
11905 ; X86-NEXT: retl # encoding: [0xc3]
11907 ; X64-LABEL: test_mask_expand_load_ps_256:
11909 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11910 ; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
11911 ; X64-NEXT: retq # encoding: [0xc3]
11912 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11913 ret <8 x float> %res
11916 define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
11917 ; X86-LABEL: test_maskz_expand_load_ps_256:
11919 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11920 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11921 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11922 ; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00]
11923 ; X86-NEXT: retl # encoding: [0xc3]
11925 ; X64-LABEL: test_maskz_expand_load_ps_256:
11927 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11928 ; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07]
11929 ; X64-NEXT: retq # encoding: [0xc3]
11930 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
11931 ret <8 x float> %res
11934 declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
11936 define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
11937 ; X86-LABEL: test_expand_load_ps_256:
11939 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11940 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11941 ; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
11942 ; X86-NEXT: retl # encoding: [0xc3]
11944 ; X64-LABEL: test_expand_load_ps_256:
11946 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11947 ; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
11948 ; X64-NEXT: retq # encoding: [0xc3]
11949 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
11950 ret <8 x float> %res
11953 define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
11954 ; X86-LABEL: test_mask_expand_load_q_256:
11956 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11957 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11958 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11959 ; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
11960 ; X86-NEXT: retl # encoding: [0xc3]
11962 ; X64-LABEL: test_mask_expand_load_q_256:
11964 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11965 ; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
11966 ; X64-NEXT: retq # encoding: [0xc3]
11967 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11971 define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
11972 ; X86-LABEL: test_maskz_expand_load_q_256:
11974 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11975 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11976 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11977 ; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00]
11978 ; X86-NEXT: retl # encoding: [0xc3]
11980 ; X64-LABEL: test_maskz_expand_load_q_256:
11982 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11983 ; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07]
11984 ; X64-NEXT: retq # encoding: [0xc3]
11985 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
11989 declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
11991 define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
11992 ; X86-LABEL: test_expand_load_q_256:
11994 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11995 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
11996 ; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
11997 ; X86-NEXT: retl # encoding: [0xc3]
11999 ; X64-LABEL: test_expand_load_q_256:
12001 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12002 ; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
12003 ; X64-NEXT: retq # encoding: [0xc3]
12004 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
12008 define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
12009 ; X86-LABEL: test_mask_expand_load_d_256:
12011 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12012 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
12013 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
12014 ; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
12015 ; X86-NEXT: retl # encoding: [0xc3]
12017 ; X64-LABEL: test_mask_expand_load_d_256:
12019 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12020 ; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
12021 ; X64-NEXT: retq # encoding: [0xc3]
12022 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
12026 define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
12027 ; X86-LABEL: test_maskz_expand_load_d_256:
12029 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12030 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
12031 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
12032 ; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00]
12033 ; X86-NEXT: retl # encoding: [0xc3]
12035 ; X64-LABEL: test_maskz_expand_load_d_256:
12037 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12038 ; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07]
12039 ; X64-NEXT: retq # encoding: [0xc3]
12040 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
12044 declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
12046 define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
12047 ; X86-LABEL: test_expand_load_d_256:
12049 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12050 ; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12051 ; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
12052 ; X86-NEXT: retl # encoding: [0xc3]
12054 ; X64-LABEL: test_expand_load_d_256:
12056 ; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12057 ; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
12058 ; X64-NEXT: retq # encoding: [0xc3]
12059 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
12063 define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
12064 ; X86-LABEL: test_sqrt_pd_256:
12066 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12067 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12068 ; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
12069 ; X86-NEXT: retl # encoding: [0xc3]
12071 ; X64-LABEL: test_sqrt_pd_256:
12073 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12074 ; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
12075 ; X64-NEXT: retq # encoding: [0xc3]
12076 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
12077 ret <4 x double> %res
12079 declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
12081 define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
12082 ; X86-LABEL: test_sqrt_ps_256:
12084 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12085 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12086 ; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
12087 ; X86-NEXT: retl # encoding: [0xc3]
12089 ; X64-LABEL: test_sqrt_ps_256:
12091 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12092 ; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
12093 ; X64-NEXT: retq # encoding: [0xc3]
12094 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
12095 ret <8 x float> %res
12098 declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
12100 declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12102 define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12103 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128:
12105 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12106 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12107 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12108 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12109 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12110 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12111 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12112 ; X86-NEXT: retl # encoding: [0xc3]
12114 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128:
12116 ; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12117 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12118 ; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12119 ; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12120 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12121 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12122 ; X64-NEXT: retq # encoding: [0xc3]
12123 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12124 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
12125 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
12126 %res3 = add <4 x i32> %res, %res1
12127 %res4 = add <4 x i32> %res3, %res2
12128 ret <4 x i32> %res4
12131 declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12133 define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12134 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256:
12136 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12137 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12138 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12139 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12140 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12141 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12142 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12143 ; X86-NEXT: retl # encoding: [0xc3]
12145 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256:
12147 ; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12148 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12149 ; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12150 ; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12151 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12152 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12153 ; X64-NEXT: retq # encoding: [0xc3]
12154 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12155 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
12156 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12157 %res3 = add <8 x i32> %res, %res1
12158 %res4 = add <8 x i32> %res3, %res2
12159 ret <8 x i32> %res4
12162 declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12164 define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12165 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128:
12167 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12168 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12169 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12170 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12171 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12172 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12173 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12174 ; X86-NEXT: retl # encoding: [0xc3]
12176 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128:
12178 ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12179 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12180 ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12181 ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12182 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12183 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12184 ; X64-NEXT: retq # encoding: [0xc3]
12185 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12186 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
12187 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
12188 %res3 = add <2 x i64> %res, %res1
12189 %res4 = add <2 x i64> %res3, %res2
12190 ret <2 x i64> %res4
12193 declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12195 define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12196 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256:
12198 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12199 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12200 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12201 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12202 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12203 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12204 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12205 ; X86-NEXT: retl # encoding: [0xc3]
12207 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256:
12209 ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12210 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12211 ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12212 ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12213 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12214 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12215 ; X64-NEXT: retq # encoding: [0xc3]
12216 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12217 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
12218 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12219 %res3 = add <4 x i64> %res, %res1
12220 %res4 = add <4 x i64> %res3, %res2
12221 ret <4 x i64> %res4
12224 declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8)
12226 define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
12227 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_128:
12229 ; X86-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12230 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12231 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12232 ; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12233 ; X86-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12234 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12235 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12236 ; X86-NEXT: retl # encoding: [0xc3]
12238 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_128:
12240 ; X64-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12241 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12242 ; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12243 ; X64-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12244 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12245 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12246 ; X64-NEXT: retq # encoding: [0xc3]
12247 %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
12248 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
12249 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
12250 %res3 = add <4 x i32> %res, %res1
12251 %res4 = add <4 x i32> %res3, %res2
12252 ret <4 x i32> %res4
12255 declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8)
12257 define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
12258 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_256:
12260 ; X86-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12261 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12262 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12263 ; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12264 ; X86-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12265 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12266 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12267 ; X86-NEXT: retl # encoding: [0xc3]
12269 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_256:
12271 ; X64-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12272 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12273 ; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12274 ; X64-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12275 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12276 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12277 ; X64-NEXT: retq # encoding: [0xc3]
12278 %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
12279 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
12280 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
12281 %res3 = add <8 x i32> %res, %res1
12282 %res4 = add <8 x i32> %res3, %res2
12283 ret <8 x i32> %res4
12286 declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8)
12288 define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
12289 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_128:
12291 ; X86-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12292 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12293 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12294 ; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12295 ; X86-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12296 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12297 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12298 ; X86-NEXT: retl # encoding: [0xc3]
12300 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_128:
12302 ; X64-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12303 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12304 ; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12305 ; X64-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12306 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12307 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12308 ; X64-NEXT: retq # encoding: [0xc3]
12309 %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
12310 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
12311 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
12312 %res3 = add <2 x i64> %res, %res1
12313 %res4 = add <2 x i64> %res3, %res2
12314 ret <2 x i64> %res4
12317 declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8)
12319 define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
12320 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_256:
12322 ; X86-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12323 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12324 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12325 ; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12326 ; X86-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12327 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12328 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12329 ; X86-NEXT: retl # encoding: [0xc3]
12331 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_256:
12333 ; X64-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12334 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12335 ; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12336 ; X64-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12337 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12338 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12339 ; X64-NEXT: retq # encoding: [0xc3]
12340 %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
12341 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
12342 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
12343 %res3 = add <4 x i64> %res, %res1
12344 %res4 = add <4 x i64> %res3, %res2
12345 ret <4 x i64> %res4
12348 declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12350 define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12351 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128:
12353 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12354 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12355 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12356 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12357 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12358 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12359 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12360 ; X86-NEXT: retl # encoding: [0xc3]
12362 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128:
12364 ; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12365 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12366 ; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12367 ; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12368 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12369 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12370 ; X64-NEXT: retq # encoding: [0xc3]
12371 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12372 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
12373 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
12374 %res3 = add <4 x i32> %res, %res1
12375 %res4 = add <4 x i32> %res3, %res2
12376 ret <4 x i32> %res4
12379 declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12381 define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12382 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256:
12384 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12385 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12386 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12387 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12388 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12389 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12390 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12391 ; X86-NEXT: retl # encoding: [0xc3]
12393 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256:
12395 ; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12396 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12397 ; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12398 ; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12399 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12400 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12401 ; X64-NEXT: retq # encoding: [0xc3]
12402 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12403 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
12404 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12405 %res3 = add <8 x i32> %res, %res1
12406 %res4 = add <8 x i32> %res3, %res2
12407 ret <8 x i32> %res4
12410 declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12412 define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12413 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128:
12415 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12416 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12417 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12418 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12419 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12420 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12421 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12422 ; X86-NEXT: retl # encoding: [0xc3]
12424 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128:
12426 ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12427 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12428 ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12429 ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12430 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12431 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12432 ; X64-NEXT: retq # encoding: [0xc3]
12433 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12434 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
12435 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
12436 %res3 = add <2 x i64> %res, %res1
12437 %res4 = add <2 x i64> %res3, %res2
12438 ret <2 x i64> %res4
12441 declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12443 define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12444 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256:
12446 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
12447 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12448 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12449 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
12450 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
12451 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12452 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12453 ; X86-NEXT: retl # encoding: [0xc3]
12455 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256:
12457 ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
12458 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12459 ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
12460 ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
12461 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12462 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12463 ; X64-NEXT: retq # encoding: [0xc3]
12464 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12465 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
12466 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12467 %res3 = add <4 x i64> %res, %res1
12468 %res4 = add <4 x i64> %res3, %res2
12469 ret <4 x i64> %res4
12472 declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8)
12474 define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
12475 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_128:
12477 ; X86-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
12478 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12479 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12480 ; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
12481 ; X86-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
12482 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12483 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12484 ; X86-NEXT: retl # encoding: [0xc3]
12486 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_128:
12488 ; X64-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
12489 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12490 ; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
12491 ; X64-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
12492 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12493 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12494 ; X64-NEXT: retq # encoding: [0xc3]
12495 %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
12496 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
12497 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
12498 %res3 = add <4 x i32> %res, %res1
12499 %res4 = add <4 x i32> %res3, %res2
12500 ret <4 x i32> %res4
12503 declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8)
12505 define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
12506 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_256:
12508 ; X86-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
12509 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12510 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12511 ; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
12512 ; X86-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
12513 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12514 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12515 ; X86-NEXT: retl # encoding: [0xc3]
12517 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_256:
12519 ; X64-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
12520 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12521 ; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
12522 ; X64-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
12523 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12524 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12525 ; X64-NEXT: retq # encoding: [0xc3]
12526 %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
12527 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
12528 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
12529 %res3 = add <8 x i32> %res, %res1
12530 %res4 = add <8 x i32> %res3, %res2
12531 ret <8 x i32> %res4
12534 declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8)
12536 define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
12537 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_128:
12539 ; X86-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
12540 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12541 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12542 ; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
12543 ; X86-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
12544 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12545 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12546 ; X86-NEXT: retl # encoding: [0xc3]
12548 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_128:
12550 ; X64-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
12551 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12552 ; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
12553 ; X64-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
12554 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12555 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12556 ; X64-NEXT: retq # encoding: [0xc3]
12557 %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
12558 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
12559 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
12560 %res3 = add <2 x i64> %res, %res1
12561 %res4 = add <2 x i64> %res3, %res2
12562 ret <2 x i64> %res4
12565 declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8)
12567 define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
12568 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_256:
12570 ; X86-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
12571 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12572 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12573 ; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
12574 ; X86-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
12575 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12576 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12577 ; X86-NEXT: retl # encoding: [0xc3]
12579 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_256:
12581 ; X64-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
12582 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12583 ; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
12584 ; X64-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
12585 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12586 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12587 ; X64-NEXT: retq # encoding: [0xc3]
12588 %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
12589 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
12590 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
12591 %res3 = add <4 x i64> %res, %res1
12592 %res4 = add <4 x i64> %res3, %res2
12593 ret <4 x i64> %res4
12596 declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>)
12598 define <4 x i32>@test_int_x86_avx512_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12599 ; X86-LABEL: test_int_x86_avx512_prorv_d_128:
12601 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12602 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12603 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12604 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12605 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12606 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12607 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12608 ; X86-NEXT: retl # encoding: [0xc3]
12610 ; X64-LABEL: test_int_x86_avx512_prorv_d_128:
12612 ; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
12613 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12614 ; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
12615 ; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
12616 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12617 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12618 ; X64-NEXT: retq # encoding: [0xc3]
12619 %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12620 %2 = bitcast i8 %x3 to <8 x i1>
12621 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12622 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
12623 %4 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12624 %5 = bitcast i8 %x3 to <8 x i1>
12625 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12626 %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
12627 %7 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12628 %res3 = add <4 x i32> %3, %6
12629 %res4 = add <4 x i32> %res3, %7
12630 ret <4 x i32> %res4
12633 declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>)
12635 define <8 x i32>@test_int_x86_avx512_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12636 ; X86-LABEL: test_int_x86_avx512_prorv_d_256:
12638 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12639 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12640 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12641 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12642 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12643 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12644 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12645 ; X86-NEXT: retl # encoding: [0xc3]
12647 ; X64-LABEL: test_int_x86_avx512_prorv_d_256:
12649 ; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
12650 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12651 ; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
12652 ; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
12653 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12654 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12655 ; X64-NEXT: retq # encoding: [0xc3]
12656 %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12657 %2 = bitcast i8 %x3 to <8 x i1>
12658 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
12659 %4 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12660 %5 = bitcast i8 %x3 to <8 x i1>
12661 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
12662 %7 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12663 %res3 = add <8 x i32> %3, %6
12664 %res4 = add <8 x i32> %res3, %7
12665 ret <8 x i32> %res4
12668 declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>)
12670 define <2 x i64>@test_int_x86_avx512_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12671 ; X86-LABEL: test_int_x86_avx512_prorv_q_128:
12673 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12674 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12675 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12676 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12677 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12678 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12679 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12680 ; X86-NEXT: retl # encoding: [0xc3]
12682 ; X64-LABEL: test_int_x86_avx512_prorv_q_128:
12684 ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
12685 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12686 ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
12687 ; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
12688 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12689 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12690 ; X64-NEXT: retq # encoding: [0xc3]
12691 %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12692 %2 = bitcast i8 %x3 to <8 x i1>
12693 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
12694 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
12695 %4 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12696 %5 = bitcast i8 %x3 to <8 x i1>
12697 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
12698 %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
12699 %7 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12700 %res3 = add <2 x i64> %3, %6
12701 %res4 = add <2 x i64> %res3, %7
12702 ret <2 x i64> %res4
12705 declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>)
12707 define <4 x i64>@test_int_x86_avx512_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12708 ; X86-LABEL: test_int_x86_avx512_prorv_q_256:
12710 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12711 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12712 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12713 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12714 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12715 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12716 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12717 ; X86-NEXT: retl # encoding: [0xc3]
12719 ; X64-LABEL: test_int_x86_avx512_prorv_q_256:
12721 ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
12722 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12723 ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
12724 ; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
12725 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
12726 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
12727 ; X64-NEXT: retq # encoding: [0xc3]
12728 %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
12729 %2 = bitcast i8 %x3 to <8 x i1>
12730 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12731 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
12732 %4 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
12733 %5 = bitcast i8 %x3 to <8 x i1>
12734 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12735 %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
12736 %7 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
12737 %res3 = add <4 x i64> %3, %6
12738 %res4 = add <4 x i64> %res3, %7
12739 ret <4 x i64> %res4
12742 declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32)
12744 define <4 x i32>@test_int_x86_avx512_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
12745 ; X86-LABEL: test_int_x86_avx512_prol_d_128:
12747 ; X86-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12748 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12749 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12750 ; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12751 ; X86-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12752 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12753 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12754 ; X86-NEXT: retl # encoding: [0xc3]
12756 ; X64-LABEL: test_int_x86_avx512_prol_d_128:
12758 ; X64-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
12759 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12760 ; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
12761 ; X64-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
12762 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
12763 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
12764 ; X64-NEXT: retq # encoding: [0xc3]
12765 %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
12766 %2 = bitcast i8 %x3 to <8 x i1>
12767 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12768 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
12769 %4 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
12770 %5 = bitcast i8 %x3 to <8 x i1>
12771 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12772 %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
12773 %7 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
12774 %res3 = add <4 x i32> %3, %6
12775 %res4 = add <4 x i32> %res3, %7
12776 ret <4 x i32> %res4
12779 declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32)
12781 define <8 x i32>@test_int_x86_avx512_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
12782 ; X86-LABEL: test_int_x86_avx512_prol_d_256:
12784 ; X86-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12785 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12786 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12787 ; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12788 ; X86-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12789 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12790 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12791 ; X86-NEXT: retl # encoding: [0xc3]
12793 ; X64-LABEL: test_int_x86_avx512_prol_d_256:
12795 ; X64-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
12796 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12797 ; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
12798 ; X64-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
12799 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
12800 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
12801 ; X64-NEXT: retq # encoding: [0xc3]
12802 %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
12803 %2 = bitcast i8 %x3 to <8 x i1>
12804 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
12805 %4 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
12806 %5 = bitcast i8 %x3 to <8 x i1>
12807 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
12808 %7 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
12809 %res3 = add <8 x i32> %3, %6
12810 %res4 = add <8 x i32> %res3, %7
12811 ret <8 x i32> %res4
12814 declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32)
12816 define <2 x i64>@test_int_x86_avx512_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
12817 ; X86-LABEL: test_int_x86_avx512_prol_q_128:
12819 ; X86-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12820 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12821 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12822 ; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12823 ; X86-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12824 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12825 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12826 ; X86-NEXT: retl # encoding: [0xc3]
12828 ; X64-LABEL: test_int_x86_avx512_prol_q_128:
12830 ; X64-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
12831 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12832 ; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
12833 ; X64-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
12834 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
12835 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
12836 ; X64-NEXT: retq # encoding: [0xc3]
12837 %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
12838 %2 = bitcast i8 %x3 to <8 x i1>
12839 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
12840 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
12841 %4 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
12842 %5 = bitcast i8 %x3 to <8 x i1>
12843 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
12844 %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
12845 %7 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
12846 %res3 = add <2 x i64> %3, %6
12847 %res4 = add <2 x i64> %res3, %7
12848 ret <2 x i64> %res4
12851 declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32)
12853 define <4 x i64>@test_int_x86_avx512_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
12854 ; X86-LABEL: test_int_x86_avx512_prol_q_256:
12856 ; X86-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12857 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12858 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12859 ; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12860 ; X86-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12861 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12862 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12863 ; X86-NEXT: retl # encoding: [0xc3]
12865 ; X64-LABEL: test_int_x86_avx512_prol_q_256:
12867 ; X64-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
12868 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12869 ; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
12870 ; X64-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
12871 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
12872 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
12873 ; X64-NEXT: retq # encoding: [0xc3]
12874 %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
12875 %2 = bitcast i8 %x3 to <8 x i1>
12876 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12877 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
12878 %4 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
12879 %5 = bitcast i8 %x3 to <8 x i1>
12880 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12881 %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
12882 %7 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
12883 %res3 = add <4 x i64> %3, %6
12884 %res4 = add <4 x i64> %res3, %7
12885 ret <4 x i64> %res4
12888 declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>)
12890 define <4 x i32>@test_int_x86_avx512_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12891 ; X86-LABEL: test_int_x86_avx512_prolv_d_128:
12893 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12894 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12895 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12896 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12897 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12898 ; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12899 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12900 ; X86-NEXT: retl # encoding: [0xc3]
12902 ; X64-LABEL: test_int_x86_avx512_prolv_d_128:
12904 ; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
12905 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12906 ; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
12907 ; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
12908 ; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
12909 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
12910 ; X64-NEXT: retq # encoding: [0xc3]
12911 %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12912 %2 = bitcast i8 %x3 to <8 x i1>
12913 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12914 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
12915 %4 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12916 %5 = bitcast i8 %x3 to <8 x i1>
12917 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12918 %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
12919 %7 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
12920 %res3 = add <4 x i32> %3, %6
12921 %res4 = add <4 x i32> %res3, %7
12922 ret <4 x i32> %res4
12925 declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>)
12927 define <8 x i32>@test_int_x86_avx512_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12928 ; X86-LABEL: test_int_x86_avx512_prolv_d_256:
12930 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12931 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12932 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12933 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12934 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12935 ; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12936 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12937 ; X86-NEXT: retl # encoding: [0xc3]
12939 ; X64-LABEL: test_int_x86_avx512_prolv_d_256:
12941 ; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
12942 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12943 ; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
12944 ; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
12945 ; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
12946 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
12947 ; X64-NEXT: retq # encoding: [0xc3]
12948 %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12949 %2 = bitcast i8 %x3 to <8 x i1>
12950 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
12951 %4 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12952 %5 = bitcast i8 %x3 to <8 x i1>
12953 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
12954 %7 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
12955 %res3 = add <8 x i32> %3, %6
12956 %res4 = add <8 x i32> %res3, %7
12957 ret <8 x i32> %res4
12960 declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>)
12962 define <2 x i64>@test_int_x86_avx512_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12963 ; X86-LABEL: test_int_x86_avx512_prolv_q_128:
12965 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12966 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12967 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12968 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12969 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12970 ; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12971 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12972 ; X86-NEXT: retl # encoding: [0xc3]
12974 ; X64-LABEL: test_int_x86_avx512_prolv_q_128:
12976 ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
12977 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12978 ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
12979 ; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
12980 ; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
12981 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
12982 ; X64-NEXT: retq # encoding: [0xc3]
12983 %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12984 %2 = bitcast i8 %x3 to <8 x i1>
12985 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
12986 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
12987 %4 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12988 %5 = bitcast i8 %x3 to <8 x i1>
12989 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
12990 %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
12991 %7 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
12992 %res3 = add <2 x i64> %3, %6
12993 %res4 = add <2 x i64> %res3, %7
12994 ret <2 x i64> %res4
12997 declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>)
12999 define <4 x i64>@test_int_x86_avx512_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
13000 ; X86-LABEL: test_int_x86_avx512_prolv_q_256:
13002 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
13003 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13004 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13005 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
13006 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
13007 ; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
13008 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
13009 ; X86-NEXT: retl # encoding: [0xc3]
13011 ; X64-LABEL: test_int_x86_avx512_prolv_q_256:
13013 ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
13014 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13015 ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
13016 ; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
13017 ; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
13018 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
13019 ; X64-NEXT: retq # encoding: [0xc3]
13020 %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
13021 %2 = bitcast i8 %x3 to <8 x i1>
13022 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13023 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
13024 %4 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
13025 %5 = bitcast i8 %x3 to <8 x i1>
13026 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13027 %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
13028 %7 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
13029 %res3 = add <4 x i64> %3, %6
13030 %res4 = add <4 x i64> %res3, %7
13031 ret <4 x i64> %res4
13034 declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32)
13036 define <4 x i32>@test_int_x86_avx512_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
13037 ; X86-LABEL: test_int_x86_avx512_pror_d_128:
13039 ; X86-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
13040 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13041 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13042 ; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
13043 ; X86-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
13044 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
13045 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
13046 ; X86-NEXT: retl # encoding: [0xc3]
13048 ; X64-LABEL: test_int_x86_avx512_pror_d_128:
13050 ; X64-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
13051 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13052 ; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
13053 ; X64-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
13054 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
13055 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
13056 ; X64-NEXT: retq # encoding: [0xc3]
13057 %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
13058 %2 = bitcast i8 %x3 to <8 x i1>
13059 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13060 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
13061 %4 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
13062 %5 = bitcast i8 %x3 to <8 x i1>
13063 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13064 %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
13065 %7 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
13066 %res3 = add <4 x i32> %3, %6
13067 %res4 = add <4 x i32> %res3, %7
13068 ret <4 x i32> %res4
13071 declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32)
13073 define <8 x i32>@test_int_x86_avx512_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
13074 ; X86-LABEL: test_int_x86_avx512_pror_d_256:
13076 ; X86-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
13077 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13078 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13079 ; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
13080 ; X86-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
13081 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
13082 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
13083 ; X86-NEXT: retl # encoding: [0xc3]
13085 ; X64-LABEL: test_int_x86_avx512_pror_d_256:
13087 ; X64-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
13088 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13089 ; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
13090 ; X64-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
13091 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
13092 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
13093 ; X64-NEXT: retq # encoding: [0xc3]
13094 %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
13095 %2 = bitcast i8 %x3 to <8 x i1>
13096 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
13097 %4 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
13098 %5 = bitcast i8 %x3 to <8 x i1>
13099 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
13100 %7 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
13101 %res3 = add <8 x i32> %3, %6
13102 %res4 = add <8 x i32> %res3, %7
13103 ret <8 x i32> %res4
13106 declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32)
13108 define <2 x i64>@test_int_x86_avx512_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
13109 ; X86-LABEL: test_int_x86_avx512_pror_q_128:
13111 ; X86-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
13112 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13113 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13114 ; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
13115 ; X86-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
13116 ; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
13117 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
13118 ; X86-NEXT: retl # encoding: [0xc3]
13120 ; X64-LABEL: test_int_x86_avx512_pror_q_128:
13122 ; X64-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
13123 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13124 ; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
13125 ; X64-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
13126 ; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
13127 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
13128 ; X64-NEXT: retq # encoding: [0xc3]
13129 %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
13130 %2 = bitcast i8 %x3 to <8 x i1>
13131 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
13132 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
13133 %4 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
13134 %5 = bitcast i8 %x3 to <8 x i1>
13135 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
13136 %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
13137 %7 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
13138 %res3 = add <2 x i64> %3, %6
13139 %res4 = add <2 x i64> %res3, %7
13140 ret <2 x i64> %res4
13143 declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32)
13145 define <4 x i64>@test_int_x86_avx512_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
13146 ; X86-LABEL: test_int_x86_avx512_pror_q_256:
13148 ; X86-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
13149 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13150 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13151 ; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
13152 ; X86-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
13153 ; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
13154 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
13155 ; X86-NEXT: retl # encoding: [0xc3]
13157 ; X64-LABEL: test_int_x86_avx512_pror_q_256:
13159 ; X64-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
13160 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13161 ; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
13162 ; X64-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
13163 ; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
13164 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
13165 ; X64-NEXT: retq # encoding: [0xc3]
13166 %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
13167 %2 = bitcast i8 %x3 to <8 x i1>
13168 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13169 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
13170 %4 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
13171 %5 = bitcast i8 %x3 to <8 x i1>
13172 %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
13173 %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
13174 %7 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
13175 %res3 = add <4 x i64> %3, %6
13176 %res4 = add <4 x i64> %res3, %7
13177 ret <4 x i64> %res4
13180 declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13182 define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
13183 ; CHECK-LABEL: test_vfmadd256_ps:
13185 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
13186 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
13187 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13188 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
13189 ret <8 x float> %res
13192 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
13193 ; X86-LABEL: test_mask_vfmadd256_ps:
13195 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13196 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13197 ; X86-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
13198 ; X86-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
13199 ; X86-NEXT: retl # encoding: [0xc3]
13201 ; X64-LABEL: test_mask_vfmadd256_ps:
13203 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13204 ; X64-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
13205 ; X64-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
13206 ; X64-NEXT: retq # encoding: [0xc3]
13207 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
13208 ret <8 x float> %res
13211 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13213 define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
13214 ; CHECK-LABEL: test_vfmadd128_ps:
13216 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
13217 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13218 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13219 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
13220 ret <4 x float> %res
13223 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
13224 ; X86-LABEL: test_mask_vfmadd128_ps:
13226 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13227 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13228 ; X86-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
13229 ; X86-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
13230 ; X86-NEXT: retl # encoding: [0xc3]
13232 ; X64-LABEL: test_mask_vfmadd128_ps:
13234 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13235 ; X64-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
13236 ; X64-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
13237 ; X64-NEXT: retq # encoding: [0xc3]
13238 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
13239 ret <4 x float> %res
13242 declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13244 define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
13245 ; CHECK-LABEL: test_fmadd256_pd:
13247 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
13248 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
13249 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13250 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 -1)
13251 ret <4 x double> %res
13254 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
13255 ; X86-LABEL: test_mask_fmadd256_pd:
13257 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13258 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13259 ; X86-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
13260 ; X86-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
13261 ; X86-NEXT: retl # encoding: [0xc3]
13263 ; X64-LABEL: test_mask_fmadd256_pd:
13265 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13266 ; X64-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
13267 ; X64-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2
13268 ; X64-NEXT: retq # encoding: [0xc3]
13269 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
13270 ret <4 x double> %res
13273 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13275 define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
13276 ; CHECK-LABEL: test_fmadd128_pd:
13278 ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
13279 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13280 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13281 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1)
13282 ret <2 x double> %res
13285 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
13286 ; X86-LABEL: test_mask_fmadd128_pd:
13288 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13289 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13290 ; X86-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
13291 ; X86-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
13292 ; X86-NEXT: retl # encoding: [0xc3]
13294 ; X64-LABEL: test_mask_fmadd128_pd:
13296 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13297 ; X64-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
13298 ; X64-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2
13299 ; X64-NEXT: retq # encoding: [0xc3]
13300 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
13301 ret <2 x double> %res
13304 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13306 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13307 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
13309 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13310 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13311 ; X86-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
13312 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
13313 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13314 ; X86-NEXT: retl # encoding: [0xc3]
13316 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
13318 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13319 ; X64-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
13320 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
13321 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13322 ; X64-NEXT: retq # encoding: [0xc3]
13323 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13324 ret <2 x double> %res
13327 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13329 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13330 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
13332 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13333 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13334 ; X86-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
13335 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13336 ; X86-NEXT: retl # encoding: [0xc3]
13338 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
13340 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13341 ; X64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
13342 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13343 ; X64-NEXT: retq # encoding: [0xc3]
13344 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13345 ret <2 x double> %res
13348 declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13350 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13351 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
13353 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13354 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13355 ; X86-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
13356 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2
13357 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13358 ; X86-NEXT: retl # encoding: [0xc3]
13360 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
13362 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13363 ; X64-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
13364 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2
13365 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13366 ; X64-NEXT: retq # encoding: [0xc3]
13367 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13368 ret <4 x double> %res
13371 declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13373 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13374 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
13376 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13377 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13378 ; X86-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
13379 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
13380 ; X86-NEXT: retl # encoding: [0xc3]
13382 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
13384 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13385 ; X64-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
13386 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
13387 ; X64-NEXT: retq # encoding: [0xc3]
13388 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13389 ret <4 x double> %res
13392 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13394 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13395 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
13397 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13398 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13399 ; X86-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
13400 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
13401 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13402 ; X86-NEXT: retl # encoding: [0xc3]
13404 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
13406 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13407 ; X64-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
13408 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2
13409 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13410 ; X64-NEXT: retq # encoding: [0xc3]
13411 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13412 ret <4 x float> %res
13415 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13417 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13418 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
13420 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13421 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13422 ; X86-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
13423 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13424 ; X86-NEXT: retl # encoding: [0xc3]
13426 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
13428 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13429 ; X64-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
13430 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2
13431 ; X64-NEXT: retq # encoding: [0xc3]
13432 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13433 ret <4 x float> %res
13436 declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13438 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13439 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
13441 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13442 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13443 ; X86-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
13444 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2
13445 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13446 ; X86-NEXT: retl # encoding: [0xc3]
13448 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
13450 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13451 ; X64-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
13452 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2
13453 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13454 ; X64-NEXT: retq # encoding: [0xc3]
13455 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13456 ret <8 x float> %res
13459 declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13461 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13462 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
13464 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13465 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13466 ; X86-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
13467 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
13468 ; X86-NEXT: retl # encoding: [0xc3]
13470 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
13472 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13473 ; X64-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
13474 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2
13475 ; X64-NEXT: retq # encoding: [0xc3]
13476 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13477 ret <8 x float> %res
13481 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13483 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13484 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
13486 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13487 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13488 ; X86-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
13489 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2
13490 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13491 ; X86-NEXT: retl # encoding: [0xc3]
13493 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
13495 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13496 ; X64-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
13497 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2
13498 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13499 ; X64-NEXT: retq # encoding: [0xc3]
13500 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13501 ret <2 x double> %res
13505 declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13507 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13508 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
13510 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13511 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13512 ; X86-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
13513 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2
13514 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13515 ; X86-NEXT: retl # encoding: [0xc3]
13517 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
13519 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13520 ; X64-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
13521 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2
13522 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13523 ; X64-NEXT: retq # encoding: [0xc3]
13524 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13525 ret <4 x double> %res
13528 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13530 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13531 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
13533 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13534 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13535 ; X86-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
13536 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2
13537 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13538 ; X86-NEXT: retl # encoding: [0xc3]
13540 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
13542 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13543 ; X64-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
13544 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2
13545 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13546 ; X64-NEXT: retq # encoding: [0xc3]
13547 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13548 ret <4 x float> %res
13551 declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13553 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13554 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
13556 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13557 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13558 ; X86-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
13559 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2
13560 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13561 ; X86-NEXT: retl # encoding: [0xc3]
13563 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
13565 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13566 ; X64-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
13567 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2
13568 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13569 ; X64-NEXT: retq # encoding: [0xc3]
13570 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13571 ret <8 x float> %res
13574 declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13576 define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
13577 ; CHECK-LABEL: test_vfnmadd256_ps:
13579 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
13580 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
13581 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13582 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
13583 ret <8 x float> %res
13586 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
13587 ; X86-LABEL: test_mask_vfnmadd256_ps:
13589 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13590 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13591 ; X86-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
13592 ; X86-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
13593 ; X86-NEXT: retl # encoding: [0xc3]
13595 ; X64-LABEL: test_mask_vfnmadd256_ps:
13597 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13598 ; X64-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
13599 ; X64-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
13600 ; X64-NEXT: retq # encoding: [0xc3]
13601 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
13602 ret <8 x float> %res
13605 declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13607 define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
13608 ; CHECK-LABEL: test_vfnmadd128_ps:
13610 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
13611 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
13612 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13613 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
13614 ret <4 x float> %res
13617 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
13618 ; X86-LABEL: test_mask_vfnmadd128_ps:
13620 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13621 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13622 ; X86-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
13623 ; X86-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
13624 ; X86-NEXT: retl # encoding: [0xc3]
13626 ; X64-LABEL: test_mask_vfnmadd128_ps:
13628 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13629 ; X64-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
13630 ; X64-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
13631 ; X64-NEXT: retq # encoding: [0xc3]
13632 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
13633 ret <4 x float> %res
13636 declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
13638 define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
13639 ; CHECK-LABEL: test_vfnmadd256_pd:
13641 ; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
13642 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2
13643 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13644 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
13645 ret <4 x double> %res
13648 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
13649 ; X86-LABEL: test_mask_vfnmadd256_pd:
13651 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13652 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13653 ; X86-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
13654 ; X86-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
13655 ; X86-NEXT: retl # encoding: [0xc3]
13657 ; X64-LABEL: test_mask_vfnmadd256_pd:
13659 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13660 ; X64-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
13661 ; X64-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2
13662 ; X64-NEXT: retq # encoding: [0xc3]
13663 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
13664 ret <4 x double> %res
13667 declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
13669 define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
13670 ; CHECK-LABEL: test_vfnmadd128_pd:
13672 ; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
13673 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2
13674 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13675 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
13676 ret <2 x double> %res
13679 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
13680 ; X86-LABEL: test_mask_vfnmadd128_pd:
13682 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13683 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13684 ; X86-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
13685 ; X86-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
13686 ; X86-NEXT: retl # encoding: [0xc3]
13688 ; X64-LABEL: test_mask_vfnmadd128_pd:
13690 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13691 ; X64-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
13692 ; X64-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2
13693 ; X64-NEXT: retq # encoding: [0xc3]
13694 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
13695 ret <2 x double> %res
13698 declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13700 define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
13701 ; CHECK-LABEL: test_vfnmsub256_ps:
13703 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
13704 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
13705 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13706 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
13707 ret <8 x float> %res
13710 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
13711 ; X86-LABEL: test_mask_vfnmsub256_ps:
13713 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13714 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13715 ; X86-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
13716 ; X86-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
13717 ; X86-NEXT: retl # encoding: [0xc3]
13719 ; X64-LABEL: test_mask_vfnmsub256_ps:
13721 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13722 ; X64-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
13723 ; X64-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
13724 ; X64-NEXT: retq # encoding: [0xc3]
13725 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
13726 ret <8 x float> %res
13729 declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13731 define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
13732 ; CHECK-LABEL: test_vfnmsub128_ps:
13734 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
13735 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
13736 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13737 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
13738 ret <4 x float> %res
13741 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
13742 ; X86-LABEL: test_mask_vfnmsub128_ps:
13744 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13745 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13746 ; X86-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
13747 ; X86-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
13748 ; X86-NEXT: retl # encoding: [0xc3]
13750 ; X64-LABEL: test_mask_vfnmsub128_ps:
13752 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13753 ; X64-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
13754 ; X64-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
13755 ; X64-NEXT: retq # encoding: [0xc3]
13756 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
13757 ret <4 x float> %res
13760 declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
13762 define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
13763 ; CHECK-LABEL: test_vfnmsub256_pd:
13765 ; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
13766 ; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2
13767 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13768 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
13769 ret <4 x double> %res
13772 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
13773 ; X86-LABEL: test_mask_vfnmsub256_pd:
13775 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13776 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13777 ; X86-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
13778 ; X86-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
13779 ; X86-NEXT: retl # encoding: [0xc3]
13781 ; X64-LABEL: test_mask_vfnmsub256_pd:
13783 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13784 ; X64-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
13785 ; X64-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2
13786 ; X64-NEXT: retq # encoding: [0xc3]
13787 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
13788 ret <4 x double> %res
13791 declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
13793 define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
13794 ; CHECK-LABEL: test_vfnmsub128_pd:
13796 ; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
13797 ; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2
13798 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13799 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
13800 ret <2 x double> %res
13803 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
13804 ; X86-LABEL: test_mask_vfnmsub128_pd:
13806 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13807 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13808 ; X86-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
13809 ; X86-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
13810 ; X86-NEXT: retl # encoding: [0xc3]
13812 ; X64-LABEL: test_mask_vfnmsub128_pd:
13814 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13815 ; X64-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
13816 ; X64-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2
13817 ; X64-NEXT: retq # encoding: [0xc3]
13818 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
13819 ret <2 x double> %res
13822 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
13824 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
13825 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
13827 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13828 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13829 ; X86-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
13830 ; X86-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2
13831 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13832 ; X86-NEXT: retl # encoding: [0xc3]
13834 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
13836 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13837 ; X64-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
13838 ; X64-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2
13839 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
13840 ; X64-NEXT: retq # encoding: [0xc3]
13841 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
13842 ret <2 x double> %res
13845 declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
13847 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
13848 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
13850 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13851 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13852 ; X86-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
13853 ; X86-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2
13854 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13855 ; X86-NEXT: retl # encoding: [0xc3]
13857 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
13859 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13860 ; X64-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
13861 ; X64-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2
13862 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
13863 ; X64-NEXT: retq # encoding: [0xc3]
13864 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
13865 ret <4 x double> %res
13868 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
13870 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
13871 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
13873 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13874 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13875 ; X86-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
13876 ; X86-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2
13877 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13878 ; X86-NEXT: retl # encoding: [0xc3]
13880 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
13882 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13883 ; X64-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
13884 ; X64-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2
13885 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
13886 ; X64-NEXT: retq # encoding: [0xc3]
13887 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
13888 ret <4 x float> %res
13891 declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
13893 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
13894 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
13896 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13897 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13898 ; X86-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
13899 ; X86-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2
13900 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13901 ; X86-NEXT: retl # encoding: [0xc3]
13903 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
13905 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13906 ; X64-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
13907 ; X64-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2
13908 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
13909 ; X64-NEXT: retq # encoding: [0xc3]
13910 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
13911 ret <8 x float> %res
13914 declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
13916 define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
13917 ; CHECK-LABEL: test_fmaddsub256_ps:
13919 ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
13920 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
13921 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13922 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1)
13923 ret <8 x float> %res
13926 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
13927 ; X86-LABEL: test_mask_fmaddsub256_ps:
13929 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13930 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13931 ; X86-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
13932 ; X86-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
13933 ; X86-NEXT: retl # encoding: [0xc3]
13935 ; X64-LABEL: test_mask_fmaddsub256_ps:
13937 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13938 ; X64-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
13939 ; X64-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
13940 ; X64-NEXT: retq # encoding: [0xc3]
13941 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
13942 ret <8 x float> %res
13945 declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
13947 define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
13948 ; CHECK-LABEL: test_fmaddsub128_ps:
13950 ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
13951 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
13952 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13953 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 -1)
13954 ret <4 x float> %res
13957 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
13958 ; X86-LABEL: test_mask_fmaddsub128_ps:
13960 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13961 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13962 ; X86-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
13963 ; X86-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
13964 ; X86-NEXT: retl # encoding: [0xc3]
13966 ; X64-LABEL: test_mask_fmaddsub128_ps:
13968 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13969 ; X64-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
13970 ; X64-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
13971 ; X64-NEXT: retq # encoding: [0xc3]
13972 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
13973 ret <4 x float> %res
13976 declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
13978 define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
13979 ; CHECK-LABEL: test_vfmaddsub256_pd:
13981 ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
13982 ; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
13983 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
13984 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
13985 ret <4 x double> %res
13988 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
13989 ; X86-LABEL: test_mask_vfmaddsub256_pd:
13991 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13992 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13993 ; X86-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
13994 ; X86-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
13995 ; X86-NEXT: retl # encoding: [0xc3]
13997 ; X64-LABEL: test_mask_vfmaddsub256_pd:
13999 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14000 ; X64-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
14001 ; X64-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2
14002 ; X64-NEXT: retq # encoding: [0xc3]
14003 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
14004 ret <4 x double> %res
14007 declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
14009 define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
14010 ; CHECK-LABEL: test_vfmaddsub128_pd:
14012 ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
14013 ; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
14014 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14015 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
14016 ret <2 x double> %res
14019 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
14020 ; X86-LABEL: test_mask_vfmaddsub128_pd:
14022 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14023 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14024 ; X86-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
14025 ; X86-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
14026 ; X86-NEXT: retl # encoding: [0xc3]
14028 ; X64-LABEL: test_mask_vfmaddsub128_pd:
14030 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14031 ; X64-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
14032 ; X64-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2
14033 ; X64-NEXT: retq # encoding: [0xc3]
14034 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
14035 ret <2 x double> %res
14038 declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
14040 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
14041 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
14043 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14044 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14045 ; X86-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
14046 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2
14047 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14048 ; X86-NEXT: retl # encoding: [0xc3]
14050 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
14052 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14053 ; X64-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
14054 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2
14055 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14056 ; X64-NEXT: retq # encoding: [0xc3]
14057 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
14058 ret <2 x double> %res
14061 declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
14063 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
14064 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
14066 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14067 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14068 ; X86-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
14069 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
14070 ; X86-NEXT: retl # encoding: [0xc3]
14072 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
14074 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14075 ; X64-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
14076 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
14077 ; X64-NEXT: retq # encoding: [0xc3]
14078 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
14079 ret <2 x double> %res
14082 declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
14084 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
14085 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
14087 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14088 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14089 ; X86-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
14090 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2
14091 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14092 ; X86-NEXT: retl # encoding: [0xc3]
14094 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
14096 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14097 ; X64-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
14098 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2
14099 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14100 ; X64-NEXT: retq # encoding: [0xc3]
14101 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
14102 ret <4 x double> %res
14105 declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
14107 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
14108 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
14110 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14111 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14112 ; X86-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
14113 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
14114 ; X86-NEXT: retl # encoding: [0xc3]
14116 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
14118 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14119 ; X64-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
14120 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
14121 ; X64-NEXT: retq # encoding: [0xc3]
14122 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
14123 ret <4 x double> %res
14126 declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
14128 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
14129 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
14131 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14132 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14133 ; X86-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
14134 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2
14135 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14136 ; X86-NEXT: retl # encoding: [0xc3]
14138 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
14140 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14141 ; X64-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
14142 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2
14143 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14144 ; X64-NEXT: retq # encoding: [0xc3]
14145 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
14146 ret <4 x float> %res
14149 declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
14151 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
14152 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
14154 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14155 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14156 ; X86-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
14157 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
14158 ; X86-NEXT: retl # encoding: [0xc3]
14160 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
14162 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14163 ; X64-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
14164 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2
14165 ; X64-NEXT: retq # encoding: [0xc3]
14166 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
14167 ret <4 x float> %res
14170 declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
14172 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
14173 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
14175 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14176 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14177 ; X86-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
14178 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2
14179 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14180 ; X86-NEXT: retl # encoding: [0xc3]
14182 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
14184 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14185 ; X64-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
14186 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2
14187 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14188 ; X64-NEXT: retq # encoding: [0xc3]
14189 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
14190 ret <8 x float> %res
14193 declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
14195 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
14196 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
14198 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14199 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14200 ; X86-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
14201 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
14202 ; X86-NEXT: retl # encoding: [0xc3]
14204 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
14206 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14207 ; X64-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
14208 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2
14209 ; X64-NEXT: retq # encoding: [0xc3]
14210 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
14211 ret <8 x float> %res
14214 declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
14216 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
14217 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
14219 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14220 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14221 ; X86-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
14222 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2
14223 ; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14224 ; X86-NEXT: retl # encoding: [0xc3]
14226 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
14228 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14229 ; X64-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
14230 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2
14231 ; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
14232 ; X64-NEXT: retq # encoding: [0xc3]
14233 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
14234 ret <2 x double> %res
14237 declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
14239 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
14240 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
14242 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14243 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14244 ; X86-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
14245 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2
14246 ; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14247 ; X86-NEXT: retl # encoding: [0xc3]
14249 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
14251 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14252 ; X64-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
14253 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2
14254 ; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
14255 ; X64-NEXT: retq # encoding: [0xc3]
14256 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
14257 ret <4 x double> %res
14260 declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
14262 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
14263 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
14265 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14266 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14267 ; X86-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
14268 ; X86-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2
14269 ; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14270 ; X86-NEXT: retl # encoding: [0xc3]
14272 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
14274 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14275 ; X64-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
14276 ; X64-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2
14277 ; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
14278 ; X64-NEXT: retq # encoding: [0xc3]
14279 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
14280 ret <4 x float> %res
14283 declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
14285 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
14286 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
14288 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14289 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14290 ; X86-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
14291 ; X86-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2
14292 ; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14293 ; X86-NEXT: retl # encoding: [0xc3]
14295 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
14297 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14298 ; X64-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
14299 ; X64-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2
14300 ; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
14301 ; X64-NEXT: retq # encoding: [0xc3]
14302 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
14303 ret <8 x float> %res
14307 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
14308 ; X86-LABEL: test_mask_vfmadd128_ps_rmk:
14310 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14311 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14312 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14313 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
14314 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14315 ; X86-NEXT: retl # encoding: [0xc3]
14317 ; X64-LABEL: test_mask_vfmadd128_ps_rmk:
14319 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14320 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
14321 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14322 ; X64-NEXT: retq # encoding: [0xc3]
14323 %a2 = load <4 x float>, <4 x float>* %ptr_a2
14324 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
14325 ret <4 x float> %res
14328 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
14329 ; X86-LABEL: test_mask_vfmadd128_ps_rmka:
14331 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14332 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14333 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14334 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
14335 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14336 ; X86-NEXT: retl # encoding: [0xc3]
14338 ; X64-LABEL: test_mask_vfmadd128_ps_rmka:
14340 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14341 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
14342 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14343 ; X64-NEXT: retq # encoding: [0xc3]
14344 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
14345 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
14346 ret <4 x float> %res
14349 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
14350 ; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
14352 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14353 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
14354 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14355 ; X86-NEXT: retl # encoding: [0xc3]
14357 ; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
14359 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
14360 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14361 ; X64-NEXT: retq # encoding: [0xc3]
14362 %a2 = load <4 x float>, <4 x float>* %ptr_a2
14363 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
14364 ret <4 x float> %res
14367 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
14368 ; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
14370 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14371 ; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
14372 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14373 ; X86-NEXT: retl # encoding: [0xc3]
14375 ; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
14377 ; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
14378 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14379 ; X64-NEXT: retq # encoding: [0xc3]
14380 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
14381 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
14382 ret <4 x float> %res
14385 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
14386 ; X86-LABEL: test_mask_vfmadd128_ps_rmb:
14388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14389 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14390 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14391 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
14392 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14393 ; X86-NEXT: retl # encoding: [0xc3]
14395 ; X64-LABEL: test_mask_vfmadd128_ps_rmb:
14397 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14398 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
14399 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14400 ; X64-NEXT: retq # encoding: [0xc3]
14401 %q = load float, float* %ptr_a2
14402 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14403 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14404 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14405 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14406 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
14407 ret <4 x float> %res
14410 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
14411 ; X86-LABEL: test_mask_vfmadd128_ps_rmba:
14413 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14414 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14415 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14416 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
14417 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14418 ; X86-NEXT: retl # encoding: [0xc3]
14420 ; X64-LABEL: test_mask_vfmadd128_ps_rmba:
14422 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14423 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
14424 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14425 ; X64-NEXT: retq # encoding: [0xc3]
14426 %q = load float, float* %ptr_a2, align 4
14427 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14428 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14429 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14430 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14431 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
14432 ret <4 x float> %res
14435 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
14436 ; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
14438 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14439 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
14440 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14441 ; X86-NEXT: retl # encoding: [0xc3]
14443 ; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
14445 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
14446 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14447 ; X64-NEXT: retq # encoding: [0xc3]
14448 %q = load float, float* %ptr_a2
14449 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14450 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14451 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14452 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14453 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
14454 ret <4 x float> %res
14457 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
14458 ; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
14460 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14461 ; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
14462 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14463 ; X86-NEXT: retl # encoding: [0xc3]
14465 ; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
14467 ; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
14468 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14469 ; X64-NEXT: retq # encoding: [0xc3]
14470 %q = load float, float* %ptr_a2, align 4
14471 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
14472 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
14473 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
14474 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
14475 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
14476 ret <4 x float> %res
14479 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
14480 ; X86-LABEL: test_mask_vfmadd128_pd_rmk:
14482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14483 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14484 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14485 ; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
14486 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14487 ; X86-NEXT: retl # encoding: [0xc3]
14489 ; X64-LABEL: test_mask_vfmadd128_pd_rmk:
14491 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14492 ; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
14493 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14494 ; X64-NEXT: retq # encoding: [0xc3]
14495 %a2 = load <2 x double>, <2 x double>* %ptr_a2
14496 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
14497 ret <2 x double> %res
14500 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
14501 ; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
14503 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14504 ; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
14505 ; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14506 ; X86-NEXT: retl # encoding: [0xc3]
14508 ; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
14510 ; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
14511 ; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem
14512 ; X64-NEXT: retq # encoding: [0xc3]
14513 %a2 = load <2 x double>, <2 x double>* %ptr_a2
14514 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
14515 ret <2 x double> %res
14518 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
14519 ; X86-LABEL: test_mask_vfmadd256_pd_rmk:
14521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14522 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
14523 ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
14524 ; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
14525 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) + mem
14526 ; X86-NEXT: retl # encoding: [0xc3]
14528 ; X64-LABEL: test_mask_vfmadd256_pd_rmk:
14530 ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14531 ; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
14532 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) + mem
14533 ; X64-NEXT: retq # encoding: [0xc3]
14534 %a2 = load <4 x double>, <4 x double>* %ptr_a2
14535 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
14536 ret <4 x double> %res
14539 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
14540 ; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
14542 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
14543 ; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
14544 ; X86-NEXT: # ymm0 = (ymm1 * ymm0) + mem
14545 ; X86-NEXT: retl # encoding: [0xc3]
14547 ; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
14549 ; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
14550 ; X64-NEXT: # ymm0 = (ymm1 * ymm0) + mem
14551 ; X64-NEXT: retq # encoding: [0xc3]
14552 %a2 = load <4 x double>, <4 x double>* %ptr_a2
14553 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
14554 ret <4 x double> %res
14557 declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
14559 define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
14560 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
14562 ; X86-NEXT: vpmovqd %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc2]
14563 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14564 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14565 ; X86-NEXT: vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
14566 ; X86-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
14567 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14568 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14569 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
14570 ; X86-NEXT: retl # encoding: [0xc3]
14572 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
14574 ; X64-NEXT: vpmovqd %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc2]
14575 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14576 ; X64-NEXT: vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
14577 ; X64-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
14578 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14579 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14580 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
14581 ; X64-NEXT: retq # encoding: [0xc3]
14582 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
14583 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
14584 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
14585 %res3 = add <4 x i32> %res0, %res1
14586 %res4 = add <4 x i32> %res3, %res2
14587 ret <4 x i32> %res4
14590 define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
14591 ; X86-LABEL: test_mask_compress_pd_128:
14593 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14594 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14595 ; X86-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
14596 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14597 ; X86-NEXT: retl # encoding: [0xc3]
14599 ; X64-LABEL: test_mask_compress_pd_128:
14601 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14602 ; X64-NEXT: vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
14603 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14604 ; X64-NEXT: retq # encoding: [0xc3]
14605 %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
14606 ret <2 x double> %res
14609 define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
14610 ; X86-LABEL: test_maskz_compress_pd_128:
14612 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14613 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14614 ; X86-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
14615 ; X86-NEXT: retl # encoding: [0xc3]
14617 ; X64-LABEL: test_maskz_compress_pd_128:
14619 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14620 ; X64-NEXT: vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
14621 ; X64-NEXT: retq # encoding: [0xc3]
14622 %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
14623 ret <2 x double> %res
14626 define <2 x double> @test_compress_pd_128(<2 x double> %data) {
14627 ; CHECK-LABEL: test_compress_pd_128:
14629 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14630 %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
14631 ret <2 x double> %res
14634 declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
14636 define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
14637 ; X86-LABEL: test_mask_compress_ps_128:
14639 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14640 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14641 ; X86-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
14642 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14643 ; X86-NEXT: retl # encoding: [0xc3]
14645 ; X64-LABEL: test_mask_compress_ps_128:
14647 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14648 ; X64-NEXT: vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
14649 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14650 ; X64-NEXT: retq # encoding: [0xc3]
14651 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
14652 ret <4 x float> %res
14655 define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
14656 ; X86-LABEL: test_maskz_compress_ps_128:
14658 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14659 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14660 ; X86-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
14661 ; X86-NEXT: retl # encoding: [0xc3]
14663 ; X64-LABEL: test_maskz_compress_ps_128:
14665 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14666 ; X64-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
14667 ; X64-NEXT: retq # encoding: [0xc3]
14668 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
14669 ret <4 x float> %res
14672 define <4 x float> @test_compress_ps_128(<4 x float> %data) {
14673 ; CHECK-LABEL: test_compress_ps_128:
14675 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14676 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
14677 ret <4 x float> %res
14680 declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
14682 define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
14683 ; X86-LABEL: test_mask_compress_q_128:
14685 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14686 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14687 ; X86-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
14688 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14689 ; X86-NEXT: retl # encoding: [0xc3]
14691 ; X64-LABEL: test_mask_compress_q_128:
14693 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14694 ; X64-NEXT: vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
14695 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14696 ; X64-NEXT: retq # encoding: [0xc3]
14697 %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
14701 define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
14702 ; X86-LABEL: test_maskz_compress_q_128:
14704 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14705 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14706 ; X86-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
14707 ; X86-NEXT: retl # encoding: [0xc3]
14709 ; X64-LABEL: test_maskz_compress_q_128:
14711 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14712 ; X64-NEXT: vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
14713 ; X64-NEXT: retq # encoding: [0xc3]
14714 %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
14718 define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
14719 ; CHECK-LABEL: test_compress_q_128:
14721 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14722 %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
14726 declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
14728 define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
14729 ; X86-LABEL: test_mask_compress_d_128:
14731 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14732 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14733 ; X86-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
14734 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14735 ; X86-NEXT: retl # encoding: [0xc3]
14737 ; X64-LABEL: test_mask_compress_d_128:
14739 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14740 ; X64-NEXT: vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
14741 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14742 ; X64-NEXT: retq # encoding: [0xc3]
14743 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
14747 define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
14748 ; X86-LABEL: test_maskz_compress_d_128:
14750 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14751 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14752 ; X86-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
14753 ; X86-NEXT: retl # encoding: [0xc3]
14755 ; X64-LABEL: test_maskz_compress_d_128:
14757 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14758 ; X64-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
14759 ; X64-NEXT: retq # encoding: [0xc3]
14760 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
14764 define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
14765 ; CHECK-LABEL: test_compress_d_128:
14767 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14768 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
14772 declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
14774 define <2 x double> @test_expand_pd_128(<2 x double> %data) {
14775 ; CHECK-LABEL: test_expand_pd_128:
14777 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14778 %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
14779 ret <2 x double> %res
14782 define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
14783 ; X86-LABEL: test_mask_expand_pd_128:
14785 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14786 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14787 ; X86-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
14788 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14789 ; X86-NEXT: retl # encoding: [0xc3]
14791 ; X64-LABEL: test_mask_expand_pd_128:
14793 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14794 ; X64-NEXT: vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
14795 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14796 ; X64-NEXT: retq # encoding: [0xc3]
14797 %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
14798 ret <2 x double> %res
14801 define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
14802 ; X86-LABEL: test_maskz_expand_pd_128:
14804 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14805 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14806 ; X86-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
14807 ; X86-NEXT: retl # encoding: [0xc3]
14809 ; X64-LABEL: test_maskz_expand_pd_128:
14811 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14812 ; X64-NEXT: vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
14813 ; X64-NEXT: retq # encoding: [0xc3]
14814 %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
14815 ret <2 x double> %res
14818 declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
14820 define <4 x float> @test_expand_ps_128(<4 x float> %data) {
14821 ; CHECK-LABEL: test_expand_ps_128:
14823 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14824 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
14825 ret <4 x float> %res
14828 define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
14829 ; X86-LABEL: test_mask_expand_ps_128:
14831 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14832 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14833 ; X86-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
14834 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14835 ; X86-NEXT: retl # encoding: [0xc3]
14837 ; X64-LABEL: test_mask_expand_ps_128:
14839 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14840 ; X64-NEXT: vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
14841 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14842 ; X64-NEXT: retq # encoding: [0xc3]
14843 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
14844 ret <4 x float> %res
14847 define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
14848 ; X86-LABEL: test_maskz_expand_ps_128:
14850 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14851 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14852 ; X86-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
14853 ; X86-NEXT: retl # encoding: [0xc3]
14855 ; X64-LABEL: test_maskz_expand_ps_128:
14857 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14858 ; X64-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
14859 ; X64-NEXT: retq # encoding: [0xc3]
14860 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
14861 ret <4 x float> %res
14864 declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
14866 define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
14867 ; CHECK-LABEL: test_expand_q_128:
14869 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14870 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
14874 define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
14875 ; X86-LABEL: test_mask_expand_q_128:
14877 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14878 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14879 ; X86-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
14880 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14881 ; X86-NEXT: retl # encoding: [0xc3]
14883 ; X64-LABEL: test_mask_expand_q_128:
14885 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14886 ; X64-NEXT: vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
14887 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14888 ; X64-NEXT: retq # encoding: [0xc3]
14889 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
14893 define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
14894 ; X86-LABEL: test_maskz_expand_q_128:
14896 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14897 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14898 ; X86-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
14899 ; X86-NEXT: retl # encoding: [0xc3]
14901 ; X64-LABEL: test_maskz_expand_q_128:
14903 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14904 ; X64-NEXT: vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
14905 ; X64-NEXT: retq # encoding: [0xc3]
14906 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
14910 declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
14912 define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
14913 ; CHECK-LABEL: test_expand_d_128:
14915 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14916 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
14920 define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
14921 ; X86-LABEL: test_mask_expand_d_128:
14923 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14924 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14925 ; X86-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
14926 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14927 ; X86-NEXT: retl # encoding: [0xc3]
14929 ; X64-LABEL: test_mask_expand_d_128:
14931 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14932 ; X64-NEXT: vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
14933 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
14934 ; X64-NEXT: retq # encoding: [0xc3]
14935 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
14939 define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
14940 ; X86-LABEL: test_maskz_expand_d_128:
14942 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14943 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14944 ; X86-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
14945 ; X86-NEXT: retl # encoding: [0xc3]
14947 ; X64-LABEL: test_maskz_expand_d_128:
14949 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14950 ; X64-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
14951 ; X64-NEXT: retq # encoding: [0xc3]
14952 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
14956 declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
14958 define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
14959 ; X86-LABEL: test_mask_compress_pd_256:
14961 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14962 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14963 ; X86-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
14964 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
14965 ; X86-NEXT: retl # encoding: [0xc3]
14967 ; X64-LABEL: test_mask_compress_pd_256:
14969 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14970 ; X64-NEXT: vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
14971 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
14972 ; X64-NEXT: retq # encoding: [0xc3]
14973 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
14974 ret <4 x double> %res
14977 define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
14978 ; X86-LABEL: test_maskz_compress_pd_256:
14980 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14981 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14982 ; X86-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
14983 ; X86-NEXT: retl # encoding: [0xc3]
14985 ; X64-LABEL: test_maskz_compress_pd_256:
14987 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14988 ; X64-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
14989 ; X64-NEXT: retq # encoding: [0xc3]
14990 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
14991 ret <4 x double> %res
14994 define <4 x double> @test_compress_pd_256(<4 x double> %data) {
14995 ; CHECK-LABEL: test_compress_pd_256:
14997 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14998 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
14999 ret <4 x double> %res
15002 declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
15004 define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
15005 ; X86-LABEL: test_mask_compress_ps_256:
15007 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15008 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15009 ; X86-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
15010 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15011 ; X86-NEXT: retl # encoding: [0xc3]
15013 ; X64-LABEL: test_mask_compress_ps_256:
15015 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15016 ; X64-NEXT: vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
15017 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15018 ; X64-NEXT: retq # encoding: [0xc3]
15019 %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
15020 ret <8 x float> %res
15023 define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
15024 ; X86-LABEL: test_maskz_compress_ps_256:
15026 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15027 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15028 ; X86-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
15029 ; X86-NEXT: retl # encoding: [0xc3]
15031 ; X64-LABEL: test_maskz_compress_ps_256:
15033 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15034 ; X64-NEXT: vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
15035 ; X64-NEXT: retq # encoding: [0xc3]
15036 %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
15037 ret <8 x float> %res
15040 define <8 x float> @test_compress_ps_256(<8 x float> %data) {
15041 ; CHECK-LABEL: test_compress_ps_256:
15043 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15044 %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
15045 ret <8 x float> %res
15048 declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
15050 define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
15051 ; X86-LABEL: test_mask_compress_q_256:
15053 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15054 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15055 ; X86-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
15056 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15057 ; X86-NEXT: retl # encoding: [0xc3]
15059 ; X64-LABEL: test_mask_compress_q_256:
15061 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15062 ; X64-NEXT: vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
15063 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15064 ; X64-NEXT: retq # encoding: [0xc3]
15065 %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
15069 define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
15070 ; X86-LABEL: test_maskz_compress_q_256:
15072 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15073 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15074 ; X86-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
15075 ; X86-NEXT: retl # encoding: [0xc3]
15077 ; X64-LABEL: test_maskz_compress_q_256:
15079 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15080 ; X64-NEXT: vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
15081 ; X64-NEXT: retq # encoding: [0xc3]
15082 %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
15086 define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
15087 ; CHECK-LABEL: test_compress_q_256:
15089 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15090 %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
15094 declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
15096 define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
15097 ; X86-LABEL: test_mask_compress_d_256:
15099 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15100 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15101 ; X86-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
15102 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15103 ; X86-NEXT: retl # encoding: [0xc3]
15105 ; X64-LABEL: test_mask_compress_d_256:
15107 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15108 ; X64-NEXT: vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
15109 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15110 ; X64-NEXT: retq # encoding: [0xc3]
15111 %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
15115 define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
15116 ; X86-LABEL: test_maskz_compress_d_256:
15118 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15119 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15120 ; X86-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
15121 ; X86-NEXT: retl # encoding: [0xc3]
15123 ; X64-LABEL: test_maskz_compress_d_256:
15125 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15126 ; X64-NEXT: vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
15127 ; X64-NEXT: retq # encoding: [0xc3]
15128 %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
15132 define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
15133 ; CHECK-LABEL: test_compress_d_256:
15135 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15136 %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
15140 declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
15142 define <4 x double> @test_expand_pd_256(<4 x double> %data) {
15143 ; CHECK-LABEL: test_expand_pd_256:
15145 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15146 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
15147 ret <4 x double> %res
15150 define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
15151 ; X86-LABEL: test_mask_expand_pd_256:
15153 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15154 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15155 ; X86-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
15156 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15157 ; X86-NEXT: retl # encoding: [0xc3]
15159 ; X64-LABEL: test_mask_expand_pd_256:
15161 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15162 ; X64-NEXT: vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
15163 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15164 ; X64-NEXT: retq # encoding: [0xc3]
15165 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
15166 ret <4 x double> %res
15169 define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
15170 ; X86-LABEL: test_maskz_expand_pd_256:
15172 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15173 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15174 ; X86-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
15175 ; X86-NEXT: retl # encoding: [0xc3]
15177 ; X64-LABEL: test_maskz_expand_pd_256:
15179 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15180 ; X64-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
15181 ; X64-NEXT: retq # encoding: [0xc3]
15182 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
15183 ret <4 x double> %res
15186 declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
15188 define <8 x float> @test_expand_ps_256(<8 x float> %data) {
15189 ; CHECK-LABEL: test_expand_ps_256:
15191 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15192 %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
15193 ret <8 x float> %res
15196 define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
15197 ; X86-LABEL: test_mask_expand_ps_256:
15199 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15200 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15201 ; X86-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
15202 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15203 ; X86-NEXT: retl # encoding: [0xc3]
15205 ; X64-LABEL: test_mask_expand_ps_256:
15207 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15208 ; X64-NEXT: vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
15209 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15210 ; X64-NEXT: retq # encoding: [0xc3]
15211 %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
15212 ret <8 x float> %res
15215 define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
15216 ; X86-LABEL: test_maskz_expand_ps_256:
15218 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15219 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15220 ; X86-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
15221 ; X86-NEXT: retl # encoding: [0xc3]
15223 ; X64-LABEL: test_maskz_expand_ps_256:
15225 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15226 ; X64-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
15227 ; X64-NEXT: retq # encoding: [0xc3]
15228 %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
15229 ret <8 x float> %res
15232 declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
15234 define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
15235 ; CHECK-LABEL: test_expand_q_256:
15237 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15238 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
15242 define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
15243 ; X86-LABEL: test_mask_expand_q_256:
15245 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15246 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15247 ; X86-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
15248 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15249 ; X86-NEXT: retl # encoding: [0xc3]
15251 ; X64-LABEL: test_mask_expand_q_256:
15253 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15254 ; X64-NEXT: vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
15255 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15256 ; X64-NEXT: retq # encoding: [0xc3]
15257 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
15261 define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
15262 ; X86-LABEL: test_maskz_expand_q_256:
15264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15265 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15266 ; X86-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
15267 ; X86-NEXT: retl # encoding: [0xc3]
15269 ; X64-LABEL: test_maskz_expand_q_256:
15271 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15272 ; X64-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
15273 ; X64-NEXT: retq # encoding: [0xc3]
15274 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
15278 declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
15280 define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
15281 ; CHECK-LABEL: test_expand_d_256:
15283 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
15284 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
15288 define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
15289 ; X86-LABEL: test_mask_expand_d_256:
15291 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15292 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15293 ; X86-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
15294 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15295 ; X86-NEXT: retl # encoding: [0xc3]
15297 ; X64-LABEL: test_mask_expand_d_256:
15299 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15300 ; X64-NEXT: vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
15301 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
15302 ; X64-NEXT: retq # encoding: [0xc3]
15303 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
15307 define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
15308 ; X86-LABEL: test_maskz_expand_d_256:
15310 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15311 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15312 ; X86-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
15313 ; X86-NEXT: retl # encoding: [0xc3]
15315 ; X64-LABEL: test_maskz_expand_d_256:
15317 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15318 ; X64-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
15319 ; X64-NEXT: retq # encoding: [0xc3]
15320 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
15324 declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)