1 #/bin/emblua LUAPREFIX=|
5 | -----------------------------------
6 | function imm(bits) return "i"..(bits <= 32 and bits or 32) end
7 | function immv(bits) return imm(bits)..(bits > 8 and "_8" or "") end
8 | function IMM(bits) return "!I"..(bits <= 32 and bits or 32) end
9 | function IMMV(bits) return IMM(bits)..(bits > 8 and "_8" or "") end
10 | function pref16(bits) return bits == 16 and "66" or "" end
11 | function isa64(bits,x,y) return bits > 32 and (y or "x64") or x or "" end
12 | function greg(bits) return "GR"..bits end
13 | function gmem(bits) return "i"..bits.."*" end
14 | function r64bit(bits) return bits == 64 and 1 or 0 end
15 | function r16bit(bits) return bits > 8 and 1 or 0 end
16 | function rrm(rm,r,m) return rm == "rr" and r or m end
17 | function grm(rm,n) return rrm(rm,greg(n),gmem(n)) end
18 | ccs = {"O", "NO", "B", "AE", "E", "NE", "BE", "A", "S", "NS", "PE", "PO", "L", "GE", "LE", "G"}
19 | ccsflags = {"rflag_o", "rflag_o", "rflag_c", "rflag_c", "rflag_z", "rflag_z", "rflag_c rflag_z", "rflag_c rflag_z",
20 | "rflag_s", "rflag_s", "rflag_p", "rflag_p", "rflag_o", "rflag_s rflag_o", "rflag_s rflag_o rflag_z", "rflag_s rflag_o rflag_z"}
21 | --// local _ccalias={[2]="c",[2]="nae",[3]="nb",[3]="nc",[4]="z",[5]="nz",[6]="na",[7]="nbe",
22 | --// [10]="p",[11]="np",[12]="nge",[13]="nl",[14]="ng",[15]="nle"}
23 | local _RN = {[8] = "B", [16] = "H", [32] = "W", [64] = "D"}
24 | local _EAX = {[8] = "AL", [16] = "AX", [32] = "EAX", [64] = "RAX"}
25 | local _EDX = {[8] = "DL", [16] = "DX", [32] = "EDX", [64] = "RDX"}
26 | local RN = function(N) return _RN[N] end
27 | local MN = function(N) return "M"..N end
28 | local RNM = function(rm, N) return rrm(rm, RN(N), "M") end
29 | local RNMN = function(rm, N) return rrm(rm, RN(N), MN(N)) end
30 | local EAX = function(N) return _EAX[N] or "??" end
31 | local EDX = function(N) return _EDX[N] or "??" end
33 | for rm_,rm in ipairs{"rr","rm"} do
34 | local RM, ROMI = rrm(rm,"R","M"), rrm(rm, "o", "i")
35 | local RMEM, WMEM, RWMEM = rrm(rm,"","rmem"), rrm(rm,"","wmem"), rrm(rm,"","rwmem")
36 | for k,v in ipairs{8,16,32,64} do
37 | local M,R,I, R16,W = gmem(v),greg(v),imm(v),r16bit(v),r64bit(v)
38 | local RRM = rrm(rm,R,M)
39 | local ISA, P = isa64(v), pref16(v)
40 MOV_%(RN(v))%(RNM(rm,v))
41 {isa %ISA; ops %R dst/o, %RRM src/i; flags %RMEM;
42 pref %P; rex %rm %W dst src; coding !par(%(0x8a+R16)) !RRM%(RM)($dst,$src);
43 fold MOV_M%(RN(v)) MOV_%(RN(v))M}
44 | for l,w in ipairs{{"ADD",0, 1}, {"OR",1, 1}, {"AND",4, 1}, {"SUB",5, 0}, {"XOR",6, 1},
45 | {"ADC",2, 1, 1}, {"SBB",3, 0, 1}} do
46 %(w[1])_%(RNMN(rm,v))I //= ARITH%(v)_%(RM)I.%(w[2])
47 {isa %ISA; ops %RRM dst/%(ROMI), %I imm/i; flags wflags %RWMEM %(w[4] and "rflag_c" or "");
48 pref %P; rex %rm %W 0 dst; coding !par(%(0x80+R16)) !RRM%(RM)(!sub(%(w[2])),$dst) %(IMMV(v))($imm);
49 fold %(w[1])_%(MN(v))I}
50 %(w[1])_%(RN(v))%(RNM(rm,v)) //= ARITH%(v)_R%(RM).%(w[2])
51 {isa %ISA; ops %R dst/io, %RRM src/i; flags wflags %RMEM %(w[4] and "rflag_c" or "");
52 pref %P; rex %rm %W dst src; coding !sub(%(w[2]))*8+!par(%(0x02+R16)) !RRM%(RM)($dst,$src);
53 fold %(w[1])_M%(RN(v)) %(w[1])_%(RN(v))M; commute %(w[3] > 0 and "dst<->src" or "")}
56 %(w[1])_M%(RN(v)) //: ARITH_M%(RN(v))
57 {isa %ISA; ops %M dst/i, %R src/i; flags wflags rwmem %(w[4] and "rflag_c" or "");
58 pref %P; rex rm %W src dst; coding !sub(%(w[2]))*8+!par(%(0x00+R16)) !RRMM($src,$dst)}
60 %(w[1])_%(EAX(v))_I //: ARITH_%(EAX(v))_I
61 {isa %ISA; ops %R {%(EAX(v))} dst/io, %I imm/i; flags wflags %(w[4] and "rflag_c" or "");
62 pref %P; rex rr %W 0 0; coding !sub(%(w[2]))*8+!par(%(0x04+R16)) %(IMMV(v))($imm))}
66 {isa %ISA; ops %RRM dst/i, %I imm/i; flags wflags %RMEM;
67 pref %P; rex %rm %W 0 dst; coding !par(%(0x80+R16)) !RRM%(RM)(!sub(7),$dst) %(IMMV(v))($imm);
69 CMP_%(RN(v))%(RNM(rm,v))
70 {isa %ISA; ops %R dst/i, %RRM src/i; flags wflags %RMEM;
71 pref %P; rex %rm %W dst src; coding !sub(7)*8+!par(%(0x02+R16)) !RRM%(RM)($dst,$src);
72 fold CMP_M%(RN(v)) CMP_%(RN(v))M}
74 {isa %ISA; ops %RRM dst/i, %I imm/i; flags wflags %RMEM;
75 pref %P; rex %rm %W 0 dst; coding !par(%(0xf6+R16)) !RRM%(RM)(!sub(0),$dst) %(IMM(v))($imm);
77 TEST_%(RNM(rm,v))%(RN(v))
78 {isa %ISA; ops %RRM dst/i, %R src/i; flags wflags %RMEM;
79 pref %P; rex %rm %W src dst; coding !par(%(0x84+R16)) !RRM%(RM)($src,$dst);
80 fold TEST_%(RN(v))M TEST_M%(RN(v)); commute dst<->src}
81 | for l,w in ipairs{{"INC",0,0xfe,"wflag_c"},{"DEC",1,0xfe,"wflag_c"},{"NOT",2,0xf6,"wflags"},{"NEG",3,0xf6,""}} do
82 %(w[1])_%(RNMN(rm,v)) //= UNARY%(v)_%(RM).%(w[2])
83 {isa %ISA; ops %RRM dst/%(ROMI); flags %(w[4]) %RWMEM;
84 pref %P; rex %rm %W 0 dst; coding !par(%(w[3]+R16)) !RRM%(RM)(!sub(%(w[2])),$dst);
85 fold %(w[1])_%(MN(v))}
87 | for l,w in ipairs{{"ROL",0},{"ROR",1},{"SHL",4},{"SHR",5},{"SAR",7}} do
88 %(w[1])_%(RNMN(rm,v))I //= SHIFT%(v)_%(RM)I.%(w[2])
89 {isa %ISA; ops %RRM dst/%(ROMI), i8 sa/i; flags wflags %RWMEM;
90 pref %P; rex %rm %W 0 dst; coding !par(%(0xc0+R16)) !RRM%(RM)(!sub(%(w[2])),$dst) !I8_1($sa);
91 fold %(w[1])_%(MN(v))I}
92 %(w[1])_%(RNMN(rm,v))_CL //= SHIFT%(v)_%(RM)_CL.%(w[2])
93 {isa %ISA; ops %RRM dst/%(ROMI), GR8{cl} sa/i; flags wflags %RWMEM;
94 pref %P; rex %rm %W 0 dst; coding !par(%(0xd2+R16)) !RRM%(RM)(!sub(%(w[2])),$dst);
95 fold %(w[1])_%(MN(v))_CL}
97 | for l,w in ipairs{{"RCL",2},{"RCR",3}} do
98 %(w[1])_%(RNMN(rm,v))I //= SHIFTC%(v)_%(RM)I.%(w[2])
99 {isa %ISA; ops %RRM dst/%(ROMI), i8 sa/i; flags wflags %RWMEM rflag_c;
100 pref %P; rex %rm %W 0 dst; coding !par(%(0xc0+R16)) !RRM%(RM)(!sub(%(w[2])),$dst) !I8_1($sa);
101 fold %(w[1])_%(MN(v))I}
102 %(w[1])_%(RNMN(rm,v))_CL //= SHIFTC%(v)_%(RM)_CL.%(w[2])
103 {isa %ISA; ops %RRM dst/%(ROMI), GR8{cl} sa/i; flags wflags %RWMEM rflag_c;
104 pref %P; rex %rm %W 0 dst; coding !par(%(0xd2+R16)) !RRM%(RM)(!sub(%(w[2])),$dst);
105 fold %(w[1])_%(MN(v))_CL}
108 {isa %ISA sse42; ops GR32 dst/io, %RRM src/i; flags %RMEM;
109 pref f2; rex %rm %W dst src; extopcode 0f38; coding !parsub(%(0xf0+R16)) !RRM%(RM)($dst,$src);
110 fold CRC32_W%(MN(v))}
111 CMPXCHG_%(RNM(rm,v))%(RN(v))_%(EAX(v))
112 {isa %ISA; ops %RRM dst/%(rrm(rm,"io","i")), %R src/io, %R{%(EAX(v))} cmp/i; flags %RMEM wflags;
113 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(0xb0+R16)) !RRM%(RM)($src,$dst);
114 fold CMPXCHG_M%(RN(v))_%(EAX(v))}
115 XADD_%(RNM(rm,v))%(RN(v))
116 {isa %ISA; ops %RRM src/i, %R dst/io; flags %RWMEM;
117 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(%(0xc0+R16)) !RRM%(RM)($dst,$src);
119 XCHG_%(RNM(rm,v))%(RN(v))
120 {isa %ISA; ops %RRM src/i, %R dst/io; flags %RWMEM;
121 pref %P; rex %rm %W dst src; coding !parsub(%(0x86+R16)) !RRM%(RM)($dst,$src);
122 fold XCHG_M%(RN(v)); commute dst<->src}
125 {isa %ISA; ops %M dst/i, %I imm/i; flags wmem;
126 pref %P; rex rm %W 0 dst; coding !par(%(0xc6+R16)) !RRMM(!sub(0),$dst) %(IMM(v))($imm)}
128 {isa %ISA; ops %M dst/i, %R src/i; flags wmem;
129 pref %P; rex rm %W src dst; coding !par(%(0x88+R16)) !RRMM($src,$dst)}
131 {isa %ISA; ops %M dst/i, %R src/i; flags wflags rmem;
132 pref %P; rex rm %W src dst; coding !sub(7)*8+!par(%(0x00+R16)) !RRMM($src,$dst)}
134 {isa %ISA; ops %R dst/i, %M src/i; flags wflags rmem;
135 pref %P; rex rm %W dst src; coding !par(%(0x84+R16)) !RRMM($dst,$src)}
138 {isa %ISA; ops %R dst/o, i%v imm/i;
139 pref %P; rex rr %W 0 dst; coding !par(%(0xb0+R16*8))+($dst&7) !I%v($imm);
140 fold %(v < 64 and ("MOV_M"..v.."I"))}
142 {isa %ISA; ops %R {%(EAX(v))} dst/i, %I imm/i; flags wflags;
143 pref %P; rex rr %W 0 0; coding !sub(7)*8+!par(%(0x04+R16)) %(IMMV(v))($imm))}
148 | for l,w in ipairs{{"SHLD",0xa4},{"SHRD",0xac}} do
149 %(w[1])_%(RNM(rm,v))%(RN(v))I //= SHD_%(RNM(rm,v))%(RN(v))I.%(w[2]%16)}
150 {isa %ISA; ops %RRM dst/%(ROMI), %R src/i, i8 sa/i; flags wflags %RWMEM;
151 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(w[2])) !RRM%(RM)($src,$dst) !I8($sa);
152 fold %(w[1])_M%(RN(v))I}
153 %(w[1])_%(RNM(rm,v))%(RN(v))_CL //= SHD%(v)_%(RNM(rm,v))%(RN(v))_CL.%(w[2]%16)
154 {isa %ISA; ops %RRM dst/%(ROMI), %R src/i, GR8{cl} sa/i; flags wflags %RWMEM;
155 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(w[2]+1)) !RRM%(RM)($src,$dst);
156 fold %(w[1])_M%(RN(v))_CL}
158 | local RAX, RDX = EAX(v), EDX(v)
159 | for l,w in ipairs{{"",6},{"I",7}} do
160 %(w[1])DIV_%(RNMN(rm,v)) //= DIVREM_%(RNMN(rm,v)).%(w[2])
161 {isa %ISA; ops %R{%RAX} res/io, %R{%RDX} rem/io, %RRM src/i; flags wflags %RMEM;
162 pref %P; rex %rm %W 0 src; coding !par(0xf7) !RRM%(RM)(!sub(%(w[2])),$src);
163 fold %(w[1])DIV_%(MN(v))}
164 %(w[1])MUL_%(RNMN(rm,v)) //= XMUL_%(RNMN(rm,v)).%(w[2]-2))
165 {isa %ISA; ops %R{%RAX} lo/io, %R{%RDX} hi/o, %RRM src/i; flags wflags %RMEM;
166 pref %P; rex %rm %W 0 src; coding !par(0xf7) !RRM%(RM)(!sub(%(w[2]-2)),$src);
167 fold %(w[1])MUL_%(MN(v)); commute lo<->src}
169 IMUL_%(RN(v))%(RNM(rm,v))
170 {isa %ISA; ops %R dst/io, %RRM src/i; flags wflags %RMEM;
171 pref %P; rex %rm %W dst src; coding !par(0xaf) !RRM%(RM)($dst,$src);
172 fold IMUL_%(RN(v))RM; commute dst<->src}
173 IMUL_%(RN(v))%(RNM(rm,v))I
174 {isa %ISA; ops %R dst/o, %RRM src/i, %I imm/i; flags wflags %RMEM;
175 pref %P; rex %rm %W dst src; coding !par(0x69) !RRM%(RM)($dst,$src) %(IMMV(v))($imm);
176 fold IMUL_%(RN(v))MI}
177 BSF_%(RN(v))%(RNM(rm,v)) //= BS%(v)_%(RN(v))%(RNM(rm,v)).12
178 {isa %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
179 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbc) !RRM%(RM)($dst,$src);
181 BSR_%(RN(v))%(RNM(rm,v)) //= BS%(v)_%(RN(v))%(RNM(rm,v)).13
182 {isa %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
183 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbd) !RRM%(RM)($dst,$src);
185 BT_%(RNM(rm,v))%(RN(v))
186 {isa %ISA; ops %RRM dst/i, %R src/i; flags wflags %RMEM;
187 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(0xa3) !RRM%(RM)($src,$dst)}
189 {isa %ISA; ops %RRM dst/i, i8 imm/i; flags wflags %RMEM;
190 pref %P; rex %rm %W 0 dst; extopcode 0f; coding !par(0xba) !RRM%(RM)(!sub(4),$dst) !I8($imm)}
191 | for l,w in ipairs{{"S",0xab,5},{"R",0xb3,6},{"C",0xbb,7}} do
192 BT%(w[1])_%(RNM(rm,v))%(RN(v)) //= BTX_%(RNM(rm,v))%(RN(v)).11
193 {isa %ISA; ops %RRM dst/%(ROMI), %R src/i; flags wflags %RWMEM;
194 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(w[2])) !RRM%(RM)($src,$dst)}
195 BT%(w[1])_%(RNMN(rm,v))I //= BTX_%(RNMN(rm,v))I.5 {
196 {isa %ISA; ops %RRM dst/%(ROMI), i8 imm/i; flags wflags %RWMEM;
197 pref %P; rex %rm %W 0 dst; extopcode 0f; coding !par(0xba) !RRM%(RM)(!sub(%(w[3])),$dst) !I8($imm)}
199 | for l,w in ipairs(ccs) do
200 CMOV%(w)_%(RN(v))%(RNM(rm,v)) //= CMOV_%(RN(v))%(RNM(rm,v)).%(l-1)
201 {isa cmov %ISA; ops %R dst/io, %RRM src/i; flags subflags %RMEM;
202 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(%(0x40+l-1)) !RRM%(RM)($dst,$src);
203 fold CMOV%(w)_%(RN(v))M}
207 {isa movbe %ISA; ops %R dst/o, %RRM src/i; flags rmem;
208 pref %P; rex rm %W dst src; extopcode 0f38; coding !parsub(0xf0) !RRMM($dst,$src)}
210 {isa movbe %ISA; ops %RRM dst/i, %R src/i; flags wmem;
211 pref %P; rex rm %W src dst; extopcode 0f38; coding !parsub(0xf1) !RRMM($src,$dst)}
213 XCHG_%(EAX(v))_%(RN(v))
214 {isa %ISA; ops %R dst/io, %R{%(EAX(v))} src/io;
215 pref %P; rex rr %W 0 dst; coding !parsub(0x90)+($dst&7)}
221 POPCNT_%(RN(v))%(RNM(rm,v))
222 {isa popcnt %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
223 pref f3; rex %rm %W dst src; extopcode 0f; coding !par(0xb8) !RRM%(RM)($dst,$src);
224 fold POPCNT_%(RN(v))M}
225 | for l,w in ipairs{8,16} do
226 MOVZX_%(RN(v))%(RNMN(rm,w)) //= MOVX_%(RN(v))%(RNMN(rm,w)).6
227 {isa %ISA; ops %R dst/o, %(grm(rm,w)) src/i; flags %RMEM;
228 rex %rm %W dst src; extopcode 0f; coding !parsub(%(0xb6+R16)) !RRM%(RM)($dst,$src);
229 fold MOVZX_%(RN(v))%(MN(w))}
230 MOVSX_%(RN(v))%(RNMN(rm,w)) //= MOVX_%(RN(v))%(RNMN(rm,w)).14
231 {isa %ISA; ops %R dst/o, %(grm(rm,w)) src/i; flags %RMEM;
232 rex %rm %W dst src; extopcode 0f; coding !parsub(%(0xbe+R16)) !RRM%(RM)($dst,$src);
233 fold MOVSX_%(RN(v))%(MN(w))}
235 ANDN_%(RN(v))%(RN(v))%(RNM(rm,v))
236 {isa bmi1 %ISA; ops %R dst/o, %R src1/i, %RRM src/i; flags wflags %RMEM;
237 vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf2) !RRM%(RM)($dst,$src);
238 fold ANDN_%(RN(v))%(RN(v))M}
239 BEXTR_%(RN(v))%(RNM(rm,v))%(RN(v))
240 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags wflags %RMEM;
241 vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
242 fold BEXTR_%(RN(v))M%(RN(v))}
243 BLSI_%(RN(v))%(RNM(rm,v)) //= BMI1B_%(RN(v))%(RNM(rm,v)).3
244 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
245 vex %rm %W dst 0 src 0; extopcode 0f38; coding !par(0xf3) !RRM%(RM)(!sub(3),$src);
247 BLSMSK_%(RN(v))%(RNM(rm,v)) //= BMI1B_%(RN(v))%(RNM(rm,v)).2
248 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
249 vex %rm %W dst 0 src 0; extopcode 0f38; coding !par(0xf3) !RRM%(RM)(!sub(2),$src);
250 fold BLSMSK_%(RN(v))M}
251 BLSR_%(RN(v))%(RNM(rm,v)) //= BMI1B%(v)_%(RN(v))%(RNM(rm,v)).1
252 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
253 vex %rm %W dst 0 src 0; extopcode 0f38; coding !par(0xf3) !RRM%(RM)(!sub(1),$src);
255 BZHI_%(RN(v))%(RNM(rm,v))%(RN(v))
256 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags wflags %RMEM;
257 vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf5) !RRM%(RM)($dst,$src);
258 fold BZHI_%(RN(v))M%(RN(v))}
259 LZCNT_%(RN(v))%(RNM(rm,v))
260 {isa lzcnt %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
261 pref f3 %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbd) !RRM%(RM)($dst,$src);
262 fold LZCNT_%(RN(v))M}
263 TZCNT_%(RN(v))%(RNM(rm,v))
264 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
265 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbc) !RRM%(RM)($dst,$src);
266 fold TZCNT_%(RN(v))M}
267 MULX_%(RN(v))%(RN(v))%(RNM(rm,v))
268 {isa bmi2 %ISA; ops %R dst/o, %R src1/o, %RRM src/i, %R{%(EDX(v))} src2/i; flags %RMEM;
269 pref f2; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf6) !RRM%(RM)($dst,$src);
270 fold MULX_%(RN(v))%(RN(v))M; commute src1<->src2}
271 PDEP_%(RN(v))%(RN(v))%(RNM(rm,v))
272 {isa bmi2 %ISA; ops %R dst/o, %R src1/i, %RRM src/i; flags %RMEM;
273 pref f2; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf5) !RRM%(RM)($dst,$src);
274 fold PDEP_%(RN(v))%(RN(v))M}
275 PEXT_%(RN(v))%(RN(v))%(RNM(rm,v))
276 {isa bmi2 %ISA; ops %R dst/o, %R src1/i, %RRM src/i; flags %RMEM;
277 pref f3; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf5) !RRM%(RM)($dst,$src);
278 fold PEXT_%(RN(v))%(RN(v))M}
279 RORX_%(RN(v))%(RNM(rm,v))I
280 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, i8 imm/i; flags %RMEM;
281 pref f2; vex %rm %W 0 dst src 0; extopcode 0f3a; coding !parsub(0xf0) !RRM%(RM)($dst,$src) !I8($imm);
282 fold RORX_%(RN(v))MI}
283 SARX_%(RN(v))%(RNM(rm,v))%(RN(v))
284 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags %RMEM;
285 pref f3; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
286 fold SARX_%(RN(v))M%(RN(v))}
287 SHLX_%(RN(v))%(RNM(rm,v))%(RN(v))
288 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags %RMEM;
289 pref 66; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
290 fold SHLX_%(RN(v))M%(RN(v))}
291 SHRX_%(RN(v))%(RNM(rm,v))%(RN(v))
292 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags %RMEM;
293 pref f2; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
294 fold SHRX_%(RN(v))M%(RN(v))}
297 {isa %ISA; ops %R dst/o, i8* src/i;
298 rex rm %W dst src; coding !par(0x8d) !RRMM($dst,$src)}
301 {isa %ISA; ops %R dst/io;
302 pref %P; rex rr %W 0 dst; extopcode 0f; coding !par(0xc8)+($dst&7)}
303 LEA_%(RN(v))%(RN(v))%(RN(v))II
304 {isa %ISA; ops %R dst/o, %R src1/i, %R src2/i, i8 mul2/i, i32 imm/i;
305 rexlea %W dst src1 src2; coding !par(0x8d) !RRMLEA($dst,$src1,$src2,$mul2,$imm)}
307 {isa rdrand %ISA; ops %R dst/o; flags wflags;
308 pref %P; rex rr %W 0 dst; extopcode 0f; coding !par(0xc7) !RRMR(!sub(6),$dst)}
310 {isa x64; ops %R dst/o;
311 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(0),$dst)}
313 {isa x64; ops %R dst/o;
314 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(1),$dst)}
316 {isa x64; ops %R dst/i;
317 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(2),$dst)}
319 {isa x64; ops %R dst/i;
320 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(3),$dst)}
324 MOVSX_D%(RNMN(rm,32))
325 {isa x64only; ops GR64 dst/o, %(grm(rm,32)) src/i; flags %RMEM;
326 rex %rm 1 dst src; coding !par(%(0x63)) !RRM%(RM)($dst,$src);
328 DIV_%(RNMN(rm,8)) //= DIVREM8_%(RM).6
329 {isa ; ops GR8{al} quot/o, GR8{ah} rem/o, GR16{ax} div/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
330 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(6),$src);
332 IDIV_%(RNMN(rm,8)) //= DIVREM8_%(RM).7
333 {isa ; ops GR8{al} quot/o, GR8{ah} rem/o, GR16{ax} div/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
334 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(7),$src);
336 MUL_%(RNMN(rm,8)) //= XMUL8_%(RM).4
337 {isa ; ops GR16{ax} pr/o, GR8{al} src2/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
338 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(4),$src);
339 fold MUL_M8; commute src<->src2}
340 IMUL_%(RNMN(rm,8)) //= XMUL8_%(RM).5
341 {isa ; ops GR16{ax} pr/o, GR8{al} src2/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
342 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(5),$src);
343 fold IMUL_M8; commute src<->src2}
344 | for l,w in ipairs(ccs) do
345 SET%(w)_%(RNMN(rm,8)) //= SET_%(RM).%(l-1)
346 {isa ; ops %(grm(rm,8)) dst/%(ROMI); flags subflags %WMEM;
347 rex %rm 0 0 dst; extopcode 0f; coding !parsub(%(0x90+l-1)) !RRM%(RM)(0,$dst);
350 IJMP_%(RNMN(rm,32))_x32 //= IJMP%(RM)_x32.4
351 {isa x32only; ops %(grm(rm,32)) src/i; flags cf_jmp %RMEM;
352 rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(4),$src);
354 IJMP_%(RNMN(rm,64))_x64 //= IJMP%(RM)_x64.4
355 {isa x64only; ops %(grm(rm,64)) src/i; flags cf_jmp %RMEM;
356 rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(4),$src);
358 ICALL_%(RNMN(rm,32))_x32 //= ICALL%(RM)_x32.2
359 {isa x32only; ops %(grm(rm,32)) src/i; flags cf_call %RMEM;
360 rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(2),$src);
362 ICALL_%(RNMN(rm,64))_x64 //= ICALL%(RM)_x64.2
363 {isa x64only; ops %(grm(rm,64)) src/i; flags cf_call %RMEM;
364 rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(2),$src);
368 | ------------------------------------
370 {isa x64; ops GR64 dst/o, i32 imm/i;
371 rex rr 1 0 dst; coding !par(0xc7) !RRMR(!sub(0),$dst) !I32($imm);
374 {isa x64; ops GR64 dst/o, i32 imm/i;
375 rex rr 0 0 dst; coding !par(0xb8)+($dst&7) !I32($imm)}
376 | ------------------------------------
377 INC_H_x32 //= INCDEC_H_x32.0
378 {isa x32only; ops GR16 dst/io; flags wflag_c;
379 pref 66; rex rr 0 0 dst; coding !parsub(0x40)+($dst&7);
381 DEC_H_x32 //= INCDEC_H_x32.8
382 {isa x32only; ops GR16 dst/io; flags wflag_c;
383 pref 66; rex rr 0 0 dst; coding !parsub(0x48)+($dst&7);
385 INC_W_x32 //= INCDEC_W_x32.0
386 {isa x32only; ops GR32 dst/io; flags wflag_c;
387 rex rr 0 0 dst; coding !parsub(0x40)+($dst&7);
389 DEC_W_x32 //= INCDEC_W_x32.8
390 {isa x32only; ops GR32 dst/io; flags wflag_c;
391 rex rr 0 0 dst; coding !parsub(0x48)+($dst&7);
393 CBW {ops GR16{ax} dst/o, GR8{al} src/i; pref 66; rex rr 0 0 0; coding !par(0x98)}
394 CWDE {ops GR32{eax} dst/o, GR16{ax} src/i; rex rr 0 0 0; coding !par(0x98)}
395 CDQE {ops GR64{rax} dst/o, GR32{eax} src/i; rex rr 1 0 0; coding !par(0x98); isa x64}
396 CWD {ops GR16{dx} dst/o, GR16{ax} src/i; pref 66; rex rr 0 0 0; coding !par(0x99)}
397 CDQ {ops GR32{edx} dst/o, GR32{eax} src/i; rex rr 0 0 0; coding !par(0x99)}
398 CQO {ops GR64{rdx} dst/o, GR64{rax} src/i; rex rr 1 0 0; coding !par(0x99); isa x64}
399 PAUSE {} {pref f3; coding !parsub(0x90)}
400 CLC {flags wflag_c; coding !parsub(0xf8)}
401 //CLD {flags wflag_d; coding !parsub(0xfc)}
402 //CLI {flags wflag_i; coding !parsub(0xfa)}
403 CMC {flags rflag_c wflag_c; coding !parsub(0xf5)}
404 STC {flags wflag_c; coding !parsub(0xf9)}
405 //STD {flags wflag_d; coding !parsub(0xfd)}
406 //STI {flags wflag_i; coding !parsub(0xfb)}
407 CLFLUSH {isa clflush; ops i8* src/i; flags rwmem; rex rm 0 0 src; extopcode 0f; coding !par(0xae) !RRMM(!sub(7),$src)}
408 LAHF {ops GR8{ah} dst/o; flags rflags; coding !parsub(0x9f)}
409 SAHF {ops GR8{ah} dst/i; flags wflags; coding !parsub(0x9e)}
410 LFENCE {isa sse2; flags rwmem; rex rr 0 0 0; extopcode 0f; coding !par(0xae) !RRMR(!sub(5),0)}
411 MFENCE {isa sse2; flags rwmem; rex rr 0 0 0; extopcode 0f; coding !par(0xae) !RRMR(!sub(6),0)}
412 SFENCE {isa sse; flags rwmem; rex rr 0 0 0; extopcode 0f; coding !par(0xae) !RRMR(!sub(7),0)}
413 //MONITOR_RAX_ECX_EDX
414 // {isa monitor; ops GR32{eax} dst/i, GR32{ecx} ext/i, GR32{edx} hint/i; flags rwmem;
415 // extopcode 0f; coding 0x01 !parsub(0xc8)} //--0xc8
417 // {isa monitor; ops GR32{ecx} ext/i, GR32{eax} hint/i; flags rwmem;
418 // extopcode 0f; coding 0x01 !parsub(0xc9)} //--0xc9
420 LOCK {coding !parsub(0xf0)}
421 //RDPMC {isa rdpmc; ops GR32{edx} hi/o, GR32{eax} lo/o, GR32{ecx} sel/i; extopcode 0f; coding !parsub(0x33)}
422 //xacquire is ignored if hle is not present
423 //useable with lock_{add,adc,and,btc,btr,bts,cmpxchg,cmpxchg8b,dec,inc,neg,not,or,sbb,sub,xor,xadd,xchg}_mr,
424 //useable with xchg_mr
425 XACQUIRE {isa hle; coding !parsub(0xf2)}
426 //xrelease is ignored if hle is not present
427 //useable with lock_{add,adc,and,btc,btr,bts,cmpxchg,cmpxchg8b,dec,inc,neg,not,or,sbb,sub,xor,xadd,xchg}_mr,
428 //useable with xchg_mr, mov_mr, mov_mi
429 XRELEASE {isa hle; coding !parsub(0xf3)}
430 // resume operation at fallback address of outermost xbegin fallback address, imm is provided as EAX[31:24]
431 XABORT_I {isa rtm; ops i8 imm/i; rex rr 0 0 0; coding !par(0xc6) !RRMR(!sub(7),0) !I8($imm)}
432 XBEGIN_REL16 {isa rtm; ops BB fback/i; flags cf_jmp cf_fallthru; pref 66; coding !par(0xc7) !RRMR(!sub(7),0) !REL16($fback)}
433 XBEGIN_REL32 {isa rtm; ops BB fback/i; flags cf_jmp cf_fallthru; coding !par(0xc7) !RRMR(!sub(7),0) !REL32($fback)}
434 XEND {isa rtm; extopcode 0f; coding 0x01 !parsub(0xd5)}
435 XTEST {isa rtm; flags wflags; extopcode 0f; coding 0x01 !parsub(0xd6)}
438 {isa avx; ops GR32{edx} hi/o, GR32{eax} lo/o, GR32{ecx} sel/i; extopcode 0f; coding !par(1) !RRMR(!sub(2),0)}
441 {ops GR32{eax} r1/io, GR32{ebx} r2/o, GR32{ecx} r3/io, GR32{edx} r4/o; extopcode 0f; coding !parsub(0xa2)}
443 {isa rdtsc; ops GR32{eax} lo/o, GR32{edx} hi/o; extopcode 0f; coding !parsub(0x31)}
445 {isa rdtscp; ops GR32{eax} lo/o, GR32{edx} hi/o, GR32{ecx} aux/o; extopcode 0f; coding 0x01 !parsub(0xf9)}
447 | --------------------------
448 | for l,w in ipairs(ccs) do
449 J%(w)_BB_FT //= JCC_BB_FT.%(l-1)
450 {ops BB tgt/i, BB ft/i; flags cf_jmp cf_fallthru subflags;
451 extopcode 0f; coding !parsub(%(0x80+l-1)) !REL32_8_JCC($tgt) }
453 JMP_BB {ops BB tgt/i; flags cf_jmp; coding !par(0xe9) !REL32_8_JMP($tgt)}
454 JMP_FT {ops BB ft/i; flags cf_fallthru }
455 RET {ops GR32{esp} sp/io; flags cf_ret; coding !par(0xc3) }
456 RET_AMD {ops GR32{esp} sp/io; flags cf_ret; pref f3; coding !par(0xc3)}
457 RET_I {ops GR32{esp} sp/io, i16 imm/i; flags cf_ret; coding !par(0xc2) !I16($imm) }
458 PUSH_I {ops GR32{esp} sp/io, i32 imm/i; flags usemem; coding !par(0x68) !I32_8($imm) }
459 CALL_I_x32 {ops GR32{esp} sp/io, i32 tgt/i; flags cf_call; coding !par(0xe8) !REL32($tgt) }
460 CALL_I_x64 {ops GR32{esp} sp/io, i64 tgt/i; flags cf_call; coding !par(0xe8) !REL32($tgt) }
462 {isa x32only; ops GR32{esp} sp/io, GR32 dst/i; flags usemem;
463 rex rr 0 0 dst; coding !par(0x50)+($dst&7);
466 {isa x32only; ops GR32{esp} sp/io, GR32 dst/o; flags usemem;
467 rex rr 0 0 dst; coding !par(0x58)+($dst&7);
470 {isa x32only; ops GR32{esp} sp/io, i32* dst/i; flags usemem rmem;
471 rex rm 0 0 dst; coding !par(0xff) !RRMM(!sub(6),$dst) }
473 {isa x32only; ops GR32{esp} sp/io, i32* dst/i; flags usemem wmem;
474 rex rm 0 0 dst; coding !par(0x8f) !RRMM(!sub(0),$dst) }
476 {isa x64only; ops GR32{esp} sp/io, GR64 dst/i; flags usemem;
477 rex rr 0 0 dst; coding !par(0x50)+($dst&7);
480 {isa x64only; ops GR32{esp} sp/io, GR64 dst/o; flags usemem;
481 rex rr 0 0 dst; coding !par(0x58)+($dst&7);
484 {isa x64only; ops GR32{esp} sp/io, i64* dst/i; flags usemem rmem;
485 rex rm 0 0 dst; coding !par(0xff) !RRMM(!sub(6),$dst) }
487 {isa x64only; ops GR32{esp} sp/io, i32* dst/i; flags usemem wmem;
488 rex rm 0 0 dst; coding !par(0x8f) !RRMM(!sub(0),$dst) }
489 | --EMMS {} { parm 0x77; coding !par() : pref 0x0f }{ mmx }
491 {ops GR32{esp} sp/io; flags rflags usemem;
492 coding !parsub(0x9c)}
494 {ops GR32{esp} sp/io; flags wflags usemem;
495 coding !parsub(0x9d)}
497 CMPXCHG8B_M64_EDX_EAX_ECX_EBX
498 {isa ; ops i64* dst/i, GR32{eax} v1lo/io, GR32{edx} v1hi/io, GR32{ebx} v2lo/i, GR32{ecx} v2hi/i; flags wflags, wmem;
499 pref ; rex rm 0 0 dst; extopcode 0f; coding !par(0xc7) !RRMM(!sub(1),$dst)}
500 CMPXCHG16B_M128_RDX_RAX_RCX_RBX
501 {isa x64; ops i64* dst/i, GR64{rax} v1lo/io, GR64{rdx} v1hi/io, GR64{rbx} v2lo/i, GR64{rcx} v2hi/i; flags wflags, wmem;
502 pref ; rex rm 1 0 dst; extopcode 0f; coding !par(0xc7) !RRMM(!sub(1),$dst)}
503 | ------------------------------------
504 | function SDBITS(SD) return SD == "S" and 32 or 64 end
505 | function vreg(x,n) return "VR"..(n >= 128 and n or 128) end
506 | function vmem(x,n) return gmem(n) end
507 | function vmems(x,n) return vmem(x,SDBITS(x)) end
508 | function vrm(rm,x,n) return rrm(rm,vreg(x,n),vmem(x,n)) end
509 | function vrms(rm,x,n) return rrm(rm,vreg(x,n),vmems(x,n)) end
510 | local _XN = {[8] = "X", [16] = "X", [32] = "X", [64] = "X", [128] = "X", [256] = "Y", [512] = "Z"}
511 | local function V(w) return w[1] == "avx" and "V" or "" end
512 | local function XN(w) return _XN[w[2]] or "?" end
513 | local function XNH(w,h) return _XN[h and w[2]/2 or w[2]]end
514 | local function XNM(w) return rrm(w[3],XN(w),"M") end
515 | local function MH(w,h,f) return (h and ("M"..(w[2]/2))) or (f and "M"..w[2]) or "M" end
516 | local function XNMH(w,h,f) return rrm(w[3],XNH(w,h),MH(w,h,f)) end
517 | local function XNMN(w,h) return rrm(w[3],XN(w),(h and "M"..w[2] or "M")) end
518 | local function SN(SD) return "M"..SDBITS(SD) end
519 | local function XSN(w,SD) return rrm(w[3],"X",SN(SD)) end
520 | local function XNSN(w,SD) return rrm(w[3],XN(w),SN(SD)) end
521 | local function X(w) return w[1] == "avx" and XN(w) or "" end
522 | local function XM(w) return rrm(w[3],"X",w[2] == 128 and "M" or "M128") end
523 | local function XMN(w,n) return rrm(w[3],"X",w[2] == n and "M" or MN(n)) end
524 | local function YM(w) return rrm(w[3],"Y",w[2] == 256 and "M" or "M256") end
525 | local function DST(w, T) return w[1] == "avx" and (T.." dst/o, "..T.." src1/i") or (T.." dst/io") end
527 | function s_66(x) return x == "S" and "" or "66" end
528 | function s_f3(x) return x == "S" and "" or "f3" end
529 | function s66f2(x) return x == "S" and "66" or "f2" end
530 | function sf366(x) return x == "S" and "f3" or "66" end
531 | function sf3f2(x) return x == "S" and "f3" or "f2" end
532 | function s5be6(x) return x == "S" and "0x5b" or "0xe6" end
533 | function dupmem(x,n) return x == "D" and n == 128 and vmems(x,n) or vmem(x,n) end
534 | ------------------------------------
535 | sse = {{"ADD",0x58,1}, {"MUL",0x59,1}, {"MIN",0x5d,1}, {"MAX",0x5f,1}, {"SUB",0x5c,0}, {"DIV",0x5e,0}}
536 | ssea = {{"SQRT",0x51}}
537 | ssec = {{"RSQRT",0x52,"S"}, {"RCP",0x53,"S"}}
538 | sseb = {{"AND",0x54, 1}, {"ANDN",0x55, 0}, {"OR",0x56, 1}, {"XOR",0x57, 0}, {"UNPCKL",0x14,0}, {"UNPCKH",0x15,0},
539 | {"ADDSUB",0xd0,0,"sse3",{S="0xf2",D="0x66"}}, {"HADD",0x7c,0,"sse3",{S="0xf2",D="0x66"}}, {"HSUB",0x7d,0,"sse3",{S="0xf2",D="0x66"}},}
540 | ssecmp = {{"EQ",0}, {"LT",1}, {"LE",2}, {"UNORD",3}, {"NEQ",4}, {"NLT",5}, {"NLE",6}, {"ORD",7} }
541 | ssefma = {{"MADD",0x8,true},{"MSUB",0xa,true},{"NMADD",0xc,true},{"NMSUB",0xe,true},
542 | {"MADDSUB",6,false},{"MSUBADD",7,false}}
543 | function VEX(w,wbit,src1,reg,rm)
544 | return (w[1]=="avx" and "vex" or "rex").." "..w[3].." "
545 | ..wbit.." "..(w[1]=="avx" and src1 or "").." "..reg.." "..rm
546 | ..(w[1]=="avx" and (" "..(w[2] == 256 and 1 or 0)) or "")
548 | local function wbit(W) local t = {S=0,D=1}; return t[W] or W or 0 end
549 | function VEX_D0S(w,W) return VEX(w,wbit(W),"0","dst","src") end
550 | function VEX_S0D(w,W) return VEX(w,wbit(W),"0","src","dst") end
551 | function VEX_DS(w,W) return VEX(w,wbit(W),"src1","dst","src") end
552 | function VEX_SD(w,W) return VEX(w,wbit(W),"src1","src","dst") end
553 | function VEX_0D(w,W) return VEX(w,wbit(W),"src1",0,"dst") end
554 | for k,SD in ipairs{"S","D"} do
555 | local D = SD == "D" and 1 or 0
556 | local PP, PS = s_66(SD), sf3f2(SD)
557 | for l,w in ipairs{{"sse",128,"rr"},{"sse",128,"rm"},
558 | {"avx",128,"rr"},{"avx",128,"rm"},{"avx",256,"rr"},{"avx",256,"rm"}} do
559 | local VR, VM, SM = vreg(SD,w[2]), vmem(SD,w[2]), vmems(SD)
560 | local RM, ROMI = rrm(w[3],"R","M"), rrm(w[3], "o", "i")
561 | local VRM, SRM = rrm(w[3],VR,VM), rrm(w[3],VR,SM)
562 | local RMEM, WMEM = rrm(w[3],"","rmem"), rrm(w[3],"","wmem")
563 | local COMMUTE = (w[1]=="avx" and "src1" or "dst").."<->src"
564 | local DS = SD == "D" and "S" or "D"
565 | local ISA = (w[1]=="avx" and w[1]) or (d == "D" and "sse2" or "sse")
566 | for m,u in ipairs{{"A",0x28},{"U",0x10}} do
567 | local PM, AU, PAR = PP, u[1], u[2]
568 | if w[3] == "rm" then
569 %(V(w))MOV%(AU)P%(SD)_M%(XN(w))
570 {isa %ISA; ops %VM dst/i, %VR src/i; flags wmem;
571 pref %PM; %(VEX_S0D(w)); extopcode 0f; coding !par(%(PAR+1)) !RRMM($src,$dst)}
573 %(V(w))MOV%(AU)P%(SD)_%(XN(w))%(XNM(w))
574 {isa %ISA; ops %VR dst/i, %VRM src/i; flags wmem;
575 pref %PM; %(VEX_D0S(w)); extopcode 0f; coding !par(%(PAR)) !RRM%(RM)($dst,$src);
576 fold %(V(w))MOV%(AU)P%(SD)_M%(XN(w)) %(V(w))MOV%(AU)P%(SD)_%(XN(w))M}
578 | for m,u in ipairs(sse) do
579 %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))%(XNM(w))
580 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
581 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
582 fold %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
584 | for m,u in ipairs(ssea) do
585 %(V(w))%(u[1])P%(SD)_%(XN(w))%(XNM(w))
586 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
587 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
588 fold %(V(w))%(u[1])P%(SD)_%(XN(w))M}
590 | for m,u in ipairs(sseb) do
591 %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))%(XNM(w))
592 {isa %ISA %(u[4] or ""); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
593 pref %(u[5] and u[5][v] or PP); %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
594 fold %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
596 | for m,u in ipairs(ssecmp) do
597 %(V(w))CMP%(u[1])P%(SD)_%(XN(w))%(X(w))%(XNM(w)) //= %(V(w))SIMDCMPP%(SD)_%(XN(w))%(X(w))%(XNM(w)).%(u[2])
598 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
599 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !par(0xc2) !RRM%(RM)($dst,$src) !I8(!sub(%(u[2])));
600 fold %(V(w))CMP%(u[1])P%(SD)_%(XN(w))%(X(w))M}
602 %(V(w))SHUFP%(SD)_%(XN(w))%(X(w))%(XNM(w))I
603 {isa %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
604 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !par(0xc6) !RRM%(RM)($dst,$src) !I8($imm);
605 fold %(V(w))SHUFP%(SD)_%(XN(w))%(X(w))MI}
606 %(V(w))ROUNDP%(SD)_%(XN(w))%(XNM(w))I
607 {isa %ISA sse41; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
608 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(%(0x08+D)) !RRM%(RM)($dst,$src) !I8($imm);
609 fold %(V(w))ROUNDP%(SD)_%(XN(w))MI}
610 %(V(w))DPP%(SD)_%(XN(w))%(X(w))%(XNM(w))I
611 {isa %ISA sse41; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
612 pref %PP; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x40+D)) !RRM%(RM)($dst,$src) !I8($imm);
613 fold %(V(w))DPP%(SD)_%(XN(w))%(X(w))MI; commute %COMMUTE}
614 %(V(w))BLENDP%(SD)_%(XN(w))%(X(w))%(XNM(w))I
615 {isa %ISA sse41; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
616 pref %PP; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x0c+D)) !RRM%(RM)($dst,$src) !I8($imm);
617 fold %(V(w))BLENDP%(SD)_%(XN(w))%(X(w))MI}
618 | local VRX1, VRMX1 = vreg(SD,w[2]/(2-D)), vrm(w[3],SD,w[2]/(1+D))
619 | local VRX2, VRMX2 = vreg(SD,w[2]/(1+D)), vrm(w[3],SD,w[2]/(2-D))
620 %(V(w))CVTDQ2P%(SD)_%(XNH(w))%(XNMH(w,SD=="D"))
621 {isa %ISA sse2; ops %VR dst/o, %VRMX1 src/i; flags %RMEM;
622 pref %(s_f3(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(s5be6(SD))) !RRM%(RM)($dst,$src);
623 fold %(V(w))CVTDQ2P%(SD)_%(XNH(w))%(MH(w,SD=="D"))}
624 %(V(w))CVTP%(SD)2DQ_%(XNH(w,SD=="D"))%(XNMN(w,SD=="D" and w[2]>128))
625 {isa %ISA sse2; ops %VRX2 dst/o, %VRM src/i; flags %RMEM;
626 pref %(s66f2(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(s5be6(SD))) !RRM%(RM)($dst,$src);
627 fold %(V(w))CVTP%(SD)2DQ_%(XNH(w,SD=="D"))%(SD=="D" and w[2]>128 and MN(w[2]) or "M")}
628 %(V(w))CVTTP%(SD)2DQ_%(XNH(w,SD=="D"))%(XNMN(w,SD=="D" and w[2]>128))
629 {isa %ISA sse2; ops %VRX2 dst/o, %VRM src/i; flags %RMEM;
630 pref %(sf366(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(s5be6(SD))) !RRM%(RM)($dst,$src);
631 fold %(V(w))CVTTP%(SD)2DQ_%(XNH(w,SD=="D"))%(SD=="D" and w[2]>128 and MN(w[2]) or "M")}
632 %(V(w))CVTP%(SD)2P%(DS)_%(XNH(w,SD=="D"))%(XNMH(w,SD=="S",SD=="D" and w[2]>128))
633 {isa %ISA sse2; ops %VRX2 dst/o, %VRMX2 src/i; flags %RMEM;
634 pref %PP; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x5a) !RRM%(RM)($dst,$src);
635 fold %(V(w))CVTP%(SD)2P%(DS)_%(XNH(w,SD=="D"))%(MH(w,SD=="S",SD=="D" and w[2]>128))}
636 | if w[3] == "rr" then
637 %(V(w))MOVMSKP%(SD)_W%(XN(w))
638 {isa %ISA; ops GR32 dst/o, %VR src/i;
639 pref %(s_66(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x50) !RRM%(RM)($dst,$src)}
641 | if w[1] == "avx" then
642 VTESTP%(SD)_%(XN(w))%(XNM(w))
643 {isa avx; ops %VR dst/i, %VRM src/i; flags %RMEM wflags;
644 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(0x0e+D)) !RRM%(RM)($dst,$src);
645 fold VTESTP%(SD)_%(XN(w))M; commute dst<->src}
646 | if w[3] == "rm" then
647 VGATHERDP%(SD)_%(XN(w))M%(XNH(w,SD=="D"))%(XN(w)) //= %(V(w))GATHERP%(SD)_%(XN(w))M%(XN(w)).2
648 {isa avx2; ops %VR dst/o, %VM src/i, %VR ix/i, %VR src1/i; flags rmem vsib;
649 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(0x92) !RRMMVSIB($dst,$src,$ix)}
650 VGATHERQP%(SD)_%(XNH(w,SD=="S"))M%(XN(w))%(XNH(w,SD=="S")) //= %(V(w))GATHERP%(SD)_%(XN(w))M%(XN(w)).3
651 {isa avx2; ops %VR dst/o, %VM src/i, %VR ix/i, %VR src1/i; flags rmem vsib;
652 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(0x93) !RRMMVSIB($dst,$src,$ix)}
654 | if SD == "S" or w[2] == 256 then
655 VBROADCASTS%(SD)_%(XN(w))%(XSN(w,SD))
656 {isa %(rrm(w[3],"avx2","avx")); ops %VR dst/o, %(vrm(w[3],SD,SDBITS(SD))) src/i; flags %RMEM;
657 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(0x18+D)) !RRM%(RM)($dst,$src)}
660 VCVTPH2PS_%(XN(w))%(XNMH(w,true))
661 {isa f16c; ops %VR dst/o, %(vrm(w[3],"I",w[2]/2)) src/i; flags %RMEM;
662 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x13) !RRM%(RM)($dst,$src);
663 fold VCVTPH2PS_%(XN(w))%(MH(w,true))}
664 VCVTPS2PH_%(XNMH(w,true))%(XN(w))
665 {isa f16c; ops %(vrm(w[3],"I",w[2]/2)) dst/%(ROMI), %VR src/i, i8 imm/i; flags %RMEM;
666 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x1d) !RRM%(RM)($src,$dst) !I8($imm);
667 fold VCVTPS2PH_%(MH(w,true))%(XN(w))}
669 VBLENDVP%(SD)_%(XN(w))%(XN(w))%(XNM(w))%(XN(w)) //= VSIMD_AVX_660F3A_40_%(XN(w))%(XN(w))%(XNM(w))%(XN(w)).%(0x0a+D)
670 {isa avx; ops %VR dst/o, %VR src1/i, %VRM src/i, %VR msk/i; flags %RMEM;
671 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x4a+D)) !RRM%(RM)($dst,$src) ($msk<<4);
672 fold VBLENDVP%(SD)_%(XN(w))%(XN(w))M%(XN(w))}
673 VPERMILP%(SD)_%(XN(w))%(XN(w))%(XNM(w)) //= VSIMD_AVX_660F38_00_%(XN(w))%(XN(w))%(XNM(w))%(XN(w)).%(0x0c+D)
674 {isa avx; ops %VR dst/o, %VR src1/i, %VRM src/i; flags %RMEM;
675 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(0x0c+D)) !RRM%(RM)($dst,$src);
676 fold VPERMILP%(SD)_%(XN(w))%(XN(w))M}
677 VPERMILP%(SD)_%(XN(w))%(XNM(w))I //= VSIMD_AVX_660F3A_00_%(XN(w))%(XNM(w))I.%(0x04+D)
678 {isa avx; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
679 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(0x04+D)) !RRM%(RM)($dst,$src) !I8($imm);
680 fold VPERMILP%(SD)_%(XN(w))MI}
681 | for m,u in ipairs(ssefma) do
682 | for n,x in ipairs{{"132",0x90},{"213",0xa0},{"231",0xb0}} do
683 VF%(u[1])%(x[1])P%(SD)_%(XN(w))%(XN(w))%(XNM(w))
684 {isa fma3; ops %VR dst/io, %VR src1/i, %VRM src/i; flags %RMEM;
685 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(%(x[2]+u[2])) !RRM%(RM)($dst,$src);
686 fold VF%(u[1])%(x[1])P%(SD)_%(XN(w))%(XN(w))M}
688 VF%(u[1])%(x[1])S%(SD)_%(XN(w))%(XN(w))%(XNSN(w,SD))
689 {isa fma3; ops %VR dst/io, %VR src1/i, %SRM src/i; flags %RMEM;
690 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(%(x[2]+u[2]+1)) !RRM%(RM)($dst,$src)}
695 BLENDVP%(SD)_X%(XM(w))_XMM0 //= SIMD_SSE41_660F38_10_X%(XM(w))_XMM0.%(0x04+D)
696 {isa sse41; ops %VR dst/io, %VRM src/i, %VR{xmm0} msk/i; flags %RMEM;
697 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(0x14+D)) !RRM%(RM)($dst,$src);
698 fold BLENDVP%(SD)_XM_XMM0}
700 | if w[2] == 128 then
701 | if w[3] == "rr" then
702 %(V(w))MOVS%(SD)_X%(X(w))X
703 {isa %ISA; ops %(DST(w,VR)), %VR src/i;
704 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x10) !RRM%(RM)($dst,$src)}
706 %(V(w))MOVHLPS_X%(X(w))X
707 {isa %ISA; ops %(DST(w,VR)), %VR src/i;
708 %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x12) !RRM%(RM)($dst,$src)}
709 %(V(w))MOVLHPS_X%(X(w))X
710 {isa %ISA; ops %(DST(w,VR)), %VR src/i;
711 %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x16) !RRM%(RM)($dst,$src)}
715 {isa %ISA; ops %(vmems(SD)) dst/i, %VR src/i; flags %WMEM;
716 pref %PS; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x11) !RRM%(RM)($src,$dst)}
718 {isa %ISA; ops %VR dst/o, %SM src/i; flags %RMEM;
719 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x10) !RRM%(RM)($dst,$src)}
720 %(V(w))MOVLP%(SD)_X%(X(w))M //= %(V(w))MOVLORHP%(SD)_RM.2
721 {isa %ISA; ops %(DST(w,VR)), %(vmem(SD,64)) src/i; flags %RMEM;
722 pref %(s_66(SD)); %(VEX_DS(w)); extopcode 0f; coding !parsub(0x12) !RRM%(RM)($dst,$src)}
723 %(V(w))MOVHP%(SD)_X%(X(w))M //= %(V(w))MOVLORHP%(SD)_RM.6
724 {isa %ISA; ops %(DST(w,VR)), %(vmem(SD,64)) src/i; flags %RMEM;
725 pref %(s_66(SD)); %(VEX_DS(w)); extopcode 0f; coding !parsub(0x16) !RRM%(RM)($dst,$src)}
726 %(V(w))MOVLP%(SD)_MX //= %(V(w))MOVLORHP%(SD)_MR.3
727 {isa %ISA; ops %(vmem(SD,64)) dst/i, %VR src/i; flags %WMEM;
728 pref %(s_66(SD)); %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x13) !RRM%(RM)($src,$dst)}
729 %(V(w))MOVHP%(SD)_MX //= %(V(w))MOVLORHP%(SD)_MR.7
730 {isa %ISA; ops %(vmem(SD,64)) dst/i, %VR src/i; flags %WMEM;
731 pref %(s_66(SD)); %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x17) !RRM%(RM)($src,$dst)}
734 | for m,u in ipairs(ssec) do
735 %(V(w))%(u[1])SS_X%(X(w))%(XNSN(w,SD))
736 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
737 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)}
739 %(V(w))INSERTPS_X%(X(w))%(XNSN(w,SD))I
740 {isa %ISA sse41; ops %(DST(w,VR)), %SRM src/i, i8 imm/i; flags %RMEM;
741 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x21) !RRM%(RM)($dst,$src) !I8($imm)}
742 %(V(w))EXTRACTPS_%(RNMN(w[3],32))XI
743 {isa %ISA sse41; ops %SRM dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
744 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x17) !RRM%(RM)($src,$dst) !I8($imm)}
746 | for m,u in ipairs(sse) do
747 %(V(w))%(u[1])S%(SD)_X%(X(w))%(XNSN(w,SD))
748 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
749 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)
751 | for m,u in ipairs(ssea) do
752 %(V(w))%(u[1])S%(SD)_X%(X(w))%(XNSN(w,SD))
753 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
754 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)
756 %(V(w))ROUNDS%(SD)_X%(X(w))%(XNSN(w,SD))I
757 {isa %ISA sse41; ops %(DST(w,VR)), %SRM src/i, i8 imm/i; flags %RMEM;
758 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x0a+D)) !RRM%(RM)($dst,$src) !I8($imm)}
759 | for m,u in ipairs(ssecmp) do
760 %(V(w))CMP%(u[1])S%(SD)_X%(X(w))%(XNSN(w,SD)) //= %(V(w))SIMDCMPS%(SD)_X%(X(w))%(XNSN(w,SD)).%(u[2])
761 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
762 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !par(0xc2) !RRM%(RM)($dst,$src) !I8(!sub(%(u[2])))}
764 | for m,u in ipairs{{"",0x2f},{"U",0x2e}} do
765 %(V(w))%(u[1])COMIS%(SD)_X%(XNSN(w,SD)) //= %(V(w))SIMDCOMIS%(SD)_X%(XNSN(w,SD)).%(u[2]%16)
766 {isa %ISA; ops %VR dst/i, %SRM src/i; flags %RMEM wflags;
767 pref %PP; %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)}
769 %(V(w))CVTSI2S%(SD)_X%(X(w))%(RNMN(w[3],32))
770 {isa sse2 %ISA; ops %(DST(w,VR)), %(grm(w[3],32)) src/i; flags %RMEM;
771 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(0x2a) !RRM%(RM)($dst,$src)}
772 %(V(w))CVTSI2S%(SD)_X%(X(w))%(RNMN(w[3],64))
773 {isa sse2 %ISA; ops %(DST(w,VR)), %(grm(w[3],64)) src/i; flags %RMEM;
774 pref %PS; %(VEX_DS(w,1)); extopcode 0f; coding !parsub(0x2a) !RRM%(RM)($dst,$src)}
775 %(V(w))CVTS%(SD)2SI_W%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_W%(XNSN(w,SD)).13
776 {isa sse2 %ISA; ops GR32 dst/o, %SRM src/i; flags %RMEM;
777 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x2d) !RRM%(RM)($dst,$src)}
778 %(V(w))CVTS%(SD)2SI_D%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_D%(XNSN(w,SD)).13
779 {isa sse2 %ISA; ops GR64 dst/o, %SRM src/i; flags %RMEM;
780 pref %PS; %(VEX_D0S(w,1)); extopcode 0f; coding !parsub(0x2d) !RRM%(RM)($dst,$src)}
781 %(V(w))CVTTS%(SD)2SI_W%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_W%(XNSN(w,SD)).12
782 {isa sse2 %ISA; ops GR32 dst/o, %SRM src/i; flags %RMEM;
783 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x2c) !RRM%(RM)($dst,$src)}
784 %(V(w))CVTTS%(SD)2SI_D%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_D%(XNSN(w,SD)).12
785 {isa sse2 %ISA; ops GR64 dst/o, %SRM src/i; flags %RMEM;
786 pref %PS; %(VEX_D0S(w,1)); extopcode 0f; coding !parsub(0x2c) !RRM%(RM)($dst,$src)}
787 %(V(w))CVTS%(SD)2S%(DS)_X%(X(w))%(XNSN(w,SD))
788 {isa sse2 %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
789 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(0x5a) !RRM%(RM)($dst,$src)}
792 | if w[3] == "rm" then
794 {isa avx; ops %VR dst/o, %(vmem(SD,128)) src/i; flags %RMEM;
795 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x1a) !RRM%(RM)($dst,$src)}
797 VEXTRACTF128_%(XM(w))YI
798 {isa avx; ops %(vrm(w[3],SD,128)) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
799 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x19) !RRM%(RM)($src,$dst) !I8($imm);
800 fold VEXTRACTF128_M128YI}
801 VINSERTF128_YY%(XM(w))I
802 {isa avx; ops %(DST(w,VR)), %(vrm(w[3],SD,128)) src/i, i8 imm/i; flags %RMEM;
803 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x18) !RRM%(RM)($dst,$src) !I8($imm);
804 fold VINSERTF128_YYM128I}
805 VPERM2F128_YY%(YM(w))I
806 {isa avx; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
807 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x06) !RRM%(RM)($dst,$src) !I8($imm);
808 fold VPERM2F128_YYMI}
810 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
811 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x16) !RRM%(RM)($dst,$src);
816 {isa avx2; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
817 pref 66; %(VEX_D0S(w,1)); extopcode 0f3a; coding !parsub(0x01) !RRM%(RM)($dst,$src) !I8($imm);
822 | for m,u in ipairs(ssec) do
823 %(V(w))%(u[1])PS_%(XN(w))%(X(w))%(XNM(w))
824 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
825 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
826 fold %(V(w))%(u[1])PS_%(XN(w))%(X(w))M}
828 | for m,u in ipairs{{"L","0x12"},{"H","0x16"}} do
829 | local VRXM = rrm(w[3],VR,dupmem(SD,w[2]))
830 %(V(w))MOVS%(u[1])DUP_%(XN(w))%(XNM(w)) //= %(V(w))MOVSLORHDUP_%(XN(w))%(XNM(w)).%(u[2]%16)
831 {isa sse3 %ISA; ops %VR dst/o, %VRXM src/i; flags %RMEM;
832 pref f3; %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
833 fold %(V(w))MOVS%(u[1])DUP_%(XN(w))M}
836 | local XD, VRXM = XNMH(w,w[3]=="rm" and w[2]==128), rrm(w[3],VR,dupmem(SD,w[2]))
837 %(V(w))MOVDDUP_%(XN(w))%(XD)
838 {isa sse3 %ISA; ops %VR dst/o, %VRXM src/i; flags %RMEM;
839 pref f2; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x12) !RRM%(RM)($dst,$src)}
844 | ------------------------------------
845 | mmx = {{"ADDB",0xfc,1}, {"ADDSB",0xec,1}, {"ADDUSB", 0xdc,1},
846 | {"SUBB",0xf8,0}, {"SUBSB",0xe8,0}, {"SUBUSB", 0xd8,0},
847 | {"CMPEQB",0x74,1}, {"CMPGTB",0x64,0},
848 | {"AND",0xdb,1}, {"ANDN",0xdf,0}, {"OR", 0xeb,1}, {"XOR", 0xef,1},
849 | {"ADDW",0xfd,1}, {"ADDSW",0xed,1}, {"ADDUSW", 0xdd,1},
850 | {"SUBW",0xf9,0}, {"SUBSW",0xe9,0}, {"SUBUSW", 0xd9,0},
851 | {"CMPEQW",0x75,1}, {"CMPGTW",0x65,0},
854 | {"CMPEQD",0x76,1}, {"CMPGTD",0x66,0},
855 | {"MULLW",0xd5,1}, {"MULHW",0xe5,1}, {"MADDWD",0xf5,1},
856 | {"UNPCKLBW",0x60,0}, {"UNPCKHBW",0x68,0}, {"UNPCKLWD",0x61,0}, {"UNPCKHWD",0x69,0},
857 | {"UNPCKLDQ",0x62,0}, {"UNPCKHDQ",0x6a,0},
858 | {"ACKUSWB",0x67,0}, {"ACKSSWB",0x63,0}, {"ACKSSDW",0x6b,0},
859 | {"AVGB",0xe0,1,"mmxext"}, {"AVGW",0xe3,1,"mmxext"},
860 | {"MAXUB",0xde,1,"mmxext"}, {"MAXSW",0xee,1,"mmxext"}, {"MINUB",0xda,1,"mmxext"}, {"MINSW",0xea,1,"mmxext"},
861 | {"MULHUW",0xe4,1,"mmxext"}, {"SADBW",0xf6,1,"mmxext"},
862 | {"ADDQ",0xd4,1,"sse2"}, {"SUBQ",0xfb,0,"sse2"}, {"MULUDQ",0xf4,1,"sse2"},
863 | {"HADDW",0x01,0,"sse3","0f38"}, {"HADDD",0x02,0,"sse3","0f38"}, {"HADDSW", 0x03,0,"sse3","0f38"},
864 | {"HSUBW",0x05,0,"sse3","0f38"}, {"HSUBD",0x06,0,"sse3","0f38"}, {"HSUBSW", 0x07,0,"sse3","0f38"},
865 | {"MADDUBSW",0x04,1,"sse3","0f38"},{"MULHRSW",0x0b,1,"sse3","0f38"},{"SHUFB",0x00,0,"sse3","0f38"},
866 | {"ABSB",0x1c,0,"sse3","0f38"},{"ABSW",0x1d,0,"sse3","0f38"},{"ABSD",0x1e,0,"sse3","0f38"},
867 | {"SIGNB",0x08,0,"sse3","0f38"},{"SIGNW",0x09,0,"sse3","0f38"},{"SIGND",0x0a,0,"sse3","0f38"},
868 | {"MAXSB",0x3c,1,"sse41","0f38",true}, {"MAXSD",0x3d,1,"sse41","0f38",true}, {"MAXUW",0x3e,1,"sse41","0f38",true}, {"MAXUD",0x3f,1,"sse41","0f38",true},
869 | {"MINSB",0x38,1,"sse41","0f38",true}, {"MINSD",0x39,1,"sse41","0f38",true}, {"MINUW",0x3a,1,"sse41","0f38",true}, {"MINUD",0x3b,1,"sse41","0f38",true},
870 | {"MULDQ",0x28,1,"sse41","0f38",true}, {"MULLD",0x40,1,"sse41","0f38",true},
871 | {"CMPEQQ",0x29,1,"sse41","0f38",true}, {"CMPGTQ",0x37,0,"sse41","0f38",true},
872 | {"HMINPOSUW",0x41,0,"sse41","0f38",true},{"ACKUSDW",0x2b,0,"sse41","0f38",true},
873 | {"UNPCKLQDQ",0x6c,0,nil,nil,true},{"UNPCKHQDQ",0x6d,0,nil,nil,true},
875 | local mmxsse41b = {
876 | {"SXBW", 0x20, 2}, {"SXBD", 0x21, 4}, {"SXBQ", 0x22, 8}, {"SXWD", 0x23, 2}, {"SXWQ", 0x24, 4}, {"SXDQ", 0x25, 2},
877 | {"ZXBW", 0x30, 2}, {"ZXBD", 0x31, 4}, {"ZXBQ", 0x32, 8}, {"ZXWD", 0x33, 2}, {"ZXWQ", 0x34, 4}, {"ZXDQ", 0x35, 2},
879 | local mmxsll = {{"SLLW",0xf1,0x71,6}, {"SLLD",0xf2,0x72,6}, {"SLLQ",0xf3,0x73,6},
880 | {"SRLW", 0xd1,0x71,2}, {"SRLD", 0xd2,0x72,2}, {"SRLQ",0xd3,0x73,2},
881 | {"SRAW", 0xe1,0x71,4}, {"SRAD", 0xe2,0x72,4},
883 | for l,w in ipairs{{"sse",128,"rr"},{"sse",128,"rm"},
884 | {"avx",128,"rr"},{"avx",128,"rm"},{"avx",256,"rr"},{"avx",256,"rm"}} do
885 | local VR, VM = vreg("I",w[2]), vmem("I",w[2])
886 | -- code with ONLY reg OR memory variants
887 | local RM, ROMI = rrm(w[3],"R","M"), rrm(w[3], "o", "i")
888 | local VRM, SRM = rrm(w[3],VR,VM), rrm(w[3],VR,SM)
889 | local RMEM, WMEM = rrm(w[3],"","rmem"), rrm(w[3],"","wmem")
890 | local COMMUTE = (w[1]=="avx" and "src1" or "dst").."<->src"
891 | local ISA = (w[1]=="avx" and "avx") or "sse2"
892 | for m,u in ipairs{{"A",0x7f,0x6f,"66"},{"U",0x29,0x28,"f3"}} do
893 %(V(w))MOVDQ%(u[1])_%(XN(w))%(XNM(w))
894 {isa %ISA; ops %VR dst/o, %VRM src/i; flags %RMEM;
895 pref %(u[4]); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[3])) !RRM%(RM)($dst, $src);
896 fold %(V(w))MOVDQ%(u[1])_M%(XN(w)) %(V(w))MOVDQ%(u[1])_%(XN(w))M}
897 | if w[3] == "rm" then
898 %(V(w))MOVDQ%(u[1])_%(XNM(w))%(XN(w))
899 {isa %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
900 pref %(u[4]); %(VEX_S0D(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($src,$dst)}
903 | for m,u in ipairs(mmx) do
904 %(V(w))P%(u[1])_%(XN(w))%(X(w))%(XNM(w))
905 {isa %ISA %(u[4] or ""); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
906 pref 66; %(VEX_DS(w)); extopcode %(u[5] or "0f"); coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
907 fold %(V(w))P%(u[1])_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
909 | for m,u in ipairs(mmxsse41b) do
910 | local RMX = RM == "M" and ("M" .. w[2]/u[3]) or RM
911 %(V(w))PMOV%(u[1])_%(XN(w))%(RMX)
912 {isa %ISA sse41; ops %VR dst/o, %(vrm(w[3],"I",w[2]/u[3])) src/i; flags %RMEM;
913 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
914 fold %(V(w))PMOV%(u[1])_%(XN(w))%("M" .. w[2]/u[3])}
916 | for m,u in ipairs(mmxsll) do
917 %(V(w))P%(u[1])_%(XN(w))%(X(w))%(XNM(w))
918 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
919 pref 66; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
920 fold %(V(w))P%(u[1])_%(XN(w))%(X(w))M}
922 | for m,u in ipairs{{"D","66"}, {"LW","f2"}, {"HW","f3"}} do
923 %(V(w))PSHUF%(u[1])_%(XN(w))%(XNM(w))I
924 {isa %ISA; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
925 pref %(u[2]); %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x70) !RRM%(RM)($dst,$src) !I8($imm);
926 fold %(V(w))PSHUF%(u[1])_%(XN(w))MI}
928 %(V(w))PALIGNR_%(XN(w))%(X(w))%(XNM(w))I
929 {isa ssse3 %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
930 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x0f)) !RRM%(RM)($dst,$src) !I8($imm);
931 fold %(V(w))PALIGNR_%(XN(w))%(X(w))MI}
932 %(V(w))MPSADBW_%(XN(w))%(X(w))%(XNM(w))I
933 {isa sse41 %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
934 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x42) !RRM%(RM)($dst,$src) !I8($imm);
935 fold %(V(w))MPSADBW_%(XN(w))%(X(w))MI}
936 %(V(w))PBLENDW_%(XN(w))%(X(w))%(XNM(w))I
937 {isa sse41 %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
938 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x0e) !RRM%(RM)($dst,$src) !I8($imm);
939 fold %(V(w))PBLENDW_%(XN(w))%(X(w))MI}
940 %(V(w))PTEST_%(XN(w))%(XNM(w))
941 {isa sse41 %ISA; ops %VR dst/i, %VRM src/i; flags %RMEM wflags;
942 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x17) !RRM%(RM)($dst,$src);
943 fold %(V(w))PTEST_%(XN(w))M, commute dst<->src}
945 | if w[3] == "rm" then
946 %(V(w))MOVNTDQA_%(XN(w))%(XNM(w))
947 {isa sse41 %ISA; ops %VR dst/o, %VM src/i; flags %RMEM;
948 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x2a) !RRM%(RM)($dst,$src)}
949 %(V(w))MOVNTDQ_%(XNM(w))%(XN(w))
950 {isa sse2 %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
951 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0xe7) !RRM%(RM)($src,$dst)}
952 %(V(w))MOVNTPD_%(XNM(w))%(XN(w))
953 {isa sse2 %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
954 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x2b) !RRM%(RM)($src,$dst)}
955 %(V(w))MOVNTPS_%(XNM(w))%(XN(w))
956 {isa sse2 %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
957 pref ; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x2b) !RRM%(RM)($src,$dst)}
958 %(V(w))LDDQU_%(XN(w))%(XNM(w))
959 {isa sse3 %ISA; ops %VR dst/o, %VM src/i; flags %RMEM;
960 pref f2; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0xf0) !RRM%(RM)($dst,$src)}
961 | if w[2] == 128 then
963 {isa %ISA; ops i64* dst/i, %VR src/i; flags %WMEM;
964 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0xd6) !RRM%(RM)($src,$dst)}
965 | else -- w[2] == 256
967 {isa avx2; ops %VR dst/o, %(vmem(w[1],128)) src/i; flags %RMEM;
968 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x5a) !RRM%(RM)($dst,$src)}
970 | if w[1] == "avx" then
971 | local VR0, VM0 = vreg(w[1],w[2]/2), vmem(w[1],w[2]/2)
972 VPGATHERDD_%(XN(w))M%(XN(w))%(XN(w))
973 {isa avx2; ops %VR dst/o, %VM src/i, %VR ix/i, %VR src1/i; flags %RMEM vsib;
974 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x90) !RRM%(RM)VSIB($dst,$src,$ix)}
975 VPGATHERDQ_%(XN(w))M%(XNH(w,true))%(XN(w))
976 {isa avx2; ops %VR dst/o, %VM src/i, %VR0 ix/i, %VR src1/i; flags %RMEM vsib;
977 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(0x90) !RRM%(RM)VSIB($dst,$src,$ix)}
978 VPGATHERQD_%(XNH(w,true))M%(XN(w))%(XNH(w,true))
979 {isa avx2; ops %VR0 dst/o, %VM src/i, %VR ix/i, %VR0 src1/i; flags %RMEM vsib;
980 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x91) !RRM%(RM)VSIB($dst,$src,$ix)}
981 VPGATHERQQ_%(XN(w))M%(XN(w))%(XN(w))
982 {isa avx2; ops %VR dst/o, %VM src/i, %VR ix/i, %VR src1/i; flags %RMEM vsib;
983 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(0x91) !RRM%(RM)VSIB($dst,$src,$ix)}
984 VPMASKMOVD_%(XN(w))%(XN(w))M
985 {isa avx2; ops %VR dst/o, %VR src1/i, %VM src/i; flags %RMEM;
986 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x8c) !RRM%(RM)($dst,$src)}
987 VPMASKMOVQ_%(XN(w))%(XN(w))M
988 {isa avx2; ops %VR dst/o, %VR src1/i, %VM src/i; flags %RMEM;
989 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(0x8c) !RRM%(RM)($dst,$src)}
990 VPMASKMOVD_M%(XN(w))%(XN(w))
991 {isa avx2; ops %VM dst/i, %VR src1/i, %VR src/i; flags %WMEM;
992 pref 66; %(VEX_SD(w)); extopcode 0f38; coding !parsub(0x8e) !RRM%(RM)($src,$dst)}
993 VPMASKMOVQ_M%(XN(w))%(XN(w))
994 {isa avx2; ops %VM dst/i, %VR src1/i, %VR src/i; flags %WMEM;
995 pref 66; %(VEX_SD(w,1)); extopcode 0f38; coding !parsub(0x8e) !RRM%(RM)($src,$dst)}
998 %(V(w))PMOVMSK_W%(XN(w))
999 {isa %ISA; ops GR32 dst/o, %VR src/i;
1000 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0xd7) !RRM%(RM)($dst,$src)}
1001 | for m,u in ipairs(mmxsll) do
1002 | --P%(u[1])64_RI { out VRI64 dst : in VRI64 dst,i8 imm } { parm (xx(w[3])) : sub %(w[4]) : rexrr 0 0 dst : code $parm !RRMR($sub,$dst) !I8($imm) : pref 0x0f } {mmx}
1003 %(V(w))P%(u[1])_%(XN(w))%(X(w))I
1004 {isa %ISA; ops %(DST(w,VR)), i8 imm/i;
1005 pref 66; %(VEX_0D(w)); extopcode 0f; coding !par(%(u[3])) !RRM%(RM)(!sub(%(u[4])),$dst) !I8($imm)}
1007 | for m,u in ipairs{{"SLLDQ",7}, {"SRLDQ",3}} do
1008 %(V(w))P%(u[1])_%(XN(w))%(X(w))I
1009 {isa %ISA; ops %(DST(w,VR)), i8 imm/i;
1010 pref 66; %(VEX_0D(w)); extopcode 0f; coding !par(0x73) !RRM%(RM)(!sub(%(u[2])),$dst) !I8($imm)}
1012 | if w[2] == 128 then
1014 {isa %ISA; ops GR32 dst/o, %VR src/i, i8 imm/i;
1015 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !par(0xc5) !RRM%(RM)($dst,$src) !I8($imm)}
1016 %(V(w))MOVQ_%(XN(w))%(RNMN(w[3],64))
1017 {isa %ISA x64; ops %VR dst/o, %(grm(w[3],64)) src/i; flags %RMEM;
1018 pref 66; %(VEX_D0S(w,1)); extopcode 0f; coding !parsub(0x6e) !RRM%(RM)($dst,$src)}
1019 %(V(w))MOVQ_%(RNMN(w[3],64))%(XN(w))
1020 {isa %ISA x64; ops %(grm(w[3],64)) dst/%(ROMI), %VR src/i; flags %WMEM;
1021 pref 66; %(VEX_S0D(w,1)); extopcode 0f; coding !parsub(0x7e) !RRM%(RM)($src,$dst)}
1024 | if w[1] == "sse" then
1025 PBLENDVB_%(XN(w))%(X(w))%(XNM(w))_XMM0
1026 {isa sse41; ops %VR dst/io, %VRM src/i, %VR{xmm0} msk/i; flags %RMEM;
1027 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x10) !RRM%(RM)($dst,$src);
1028 fold PBLENDVB_%(XN(w))%(X(w))M_XMM0}
1029 MASKMOVDQU_%(XN(w))%(X(w))%(XNM(w))_XMM0
1030 {isa sse41; ops %VR dst/io, %VRM src/i, %VR{xmm0} msk/i; flags %RMEM;
1031 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x10) !RRM%(RM)($dst,$src)}
1033 | for m,u in ipairs{{"B",0x78,8}, {"W",0x79,16}, {"D",0x58,32}, {"Q",0x59,64}} do
1034 VPBROADCAST%(u[1])_%(XN(w))%(XMN(w,u[3]))
1035 {isa avx2; ops %VR dst/o, %(vrm(w[3],"I",u[3])) src/i; flags %RMEM;
1036 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
1037 fold VPBROADCAST%(u[1])_%(XN(w))%(MN(u[3]))}
1039 | for m,u in ipairs{{"SLL",0x47}, {"SRA",0x46}, {"SRL",0x45}} do
1040 VP%(u[1])VD_%(XN(w))%(X(w))%(XNM(w))
1041 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1042 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
1043 fold VP%(u[1])VD_%(XN(w))%(X(w))M}
1044 VP%(u[1])VQ_%(XN(w))%(X(w))%(XNM(w))
1045 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1046 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
1047 fold VP%(u[1])VQ_%(XN(w))%(X(w))M}
1049 VPBLENDD_%(XN(w))%(X(w))%(XNM(w))I
1050 {isa avx2; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
1051 pref 66; %(VEX_DS(w,1)); extopcode 0f3a; coding !parsub(0x02) !RRM%(RM)($dst,$src) !I8($imm);
1052 fold VPBLENDD_%(XN(w))%(X(w))MI}
1053 VPBLENDVB_%(XN(w))%(X(w))%(XNM(w))%(XN(w))
1054 {isa avx2; ops %(DST(w,VR)), %VRM src/i, %VR msk/i; flags %RMEM;
1055 pref 66; %(VEX_DS(w,1)); extopcode 0f3a; coding !parsub(0x4c) !RRM%(RM)($dst,$src) !I8($msk<<4);
1056 fold VPBLENDVB_%(XN(w))%(X(w))M%(XN(w))}
1058 | if w[2] == 128 then
1059 %(V(w))MOVD_%(XN(w))%(RNMN(w[3],32))
1060 {isa %ISA; ops %VR dst/o, %(grm(w[3],32)) src/i; flags %RMEM;
1061 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x6e) !RRM%(RM)($dst,$src)}
1062 %(V(w))MOVD_%(RNMN(w[3],32))_%(XN(w))
1063 {isa %ISA x64; ops %(grm(w[3],32)) dst/%(ROMI), %VR src/i; flags %WMEM;
1064 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x7e) !RRM%(RM)($src,$dst)}
1065 %(V(w))MOVQ_%(XN(w))%(XNM(w))
1066 {isa %ISA; ops %VR dst/o, %(rrm(w[3],vreg(w[1],64),vmem(w[1],64))) src/i; flags %RMEM;
1067 pref f3; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x7e) !RRM%(RM)($dst,$src)}
1068 %(V(w))PINSRW_%(XN(w))%(X(w))%(rrm(w[3],RN(32),MN(16)))I
1069 {isa %ISA; ops %(DST(w,VR)), %(rrm(w[3],greg(32),gmem(16))) src/i, i8 imm/i; flags %RMEM;
1070 pref 66; %(VEX_DS(w)); extopcode 0f; coding !parsub(0xc4) !RRM%(RM)($dst,$src) !I8($imm);
1071 fold %(V(w))PINSRW_%(XN(w))%(X(w))%(MN(16))I}
1072 | for m,u in ipairs{{"B",8,"0x14",32,"0x20"}, {"WX",16,"0x15",32}, {"D",32,"0x16",32,"0x22"}, {"Q",64,"0x16",64,"0x22"}} do
1073 %(V(w))PEXTR%(u[1])128_%(rrm(w[3],RN(u[4]),"M"..u[2]))%(XN(w))I
1074 {isa %ISA sse41 %(u[4]==64 and "x64" or ""); ops %(rrm(w[3],greg(u[4]),gmem(u[2]))) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
1075 pref 66; %(VEX_S0D(w,r64bit(u[4]))); extopcode 0f3a; coding !parsub(%(u[3])) !RRM%(RM)($src,$dst) !I8($imm);
1076 fold %(V(w))PEXTR%(u[1])128_%("M"..u[2])%(XN(w))I}
1077 | if u[2] ~= 16 then
1078 %(V(w))PINSR%(u[1])_%(XN(w))%(X(w))%(rrm(w[3],RN(u[4]),"M"..u[2]))I
1079 {isa %ISA sse41 %(u[4]==64 and "x64" or ""); ops %(DST(w,VR)), %(rrm(w[3],greg(u[4]),gmem(u[2]))) src/i, i8 imm/i; flags %RMEM;
1080 pref 66; %(VEX_DS(w,r64bit(u[4]))); extopcode 0f3a; coding !parsub(%(u[5])) !RRM%(RM)($dst,$src) !I8($imm);
1081 fold %(V(w))PINSR%(u[1])_%(XN(w))%(X(w))%("M"..u[2])I}
1084 %(V(w))MASKMOVDQU_RDI_%(XN(w))%(XNM(w))
1085 {isa %ISA sse2; ops GR32{edi} tgt/i, %VR dst/i, %VRM src/i; flags wmem %RMEM;
1086 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0xf7) !RRM%(RM)($dst,$src)}
1087 %(V(w))PCMPESTRI_ECX_%(XN(w))_EAX_%(XNM(w))_EDX_I
1088 {isa %ISA sse42; ops GR32{ecx} ix/o, %VR dst/i, GR32{eax} dstlen/i, %VRM src/i, GR32{edx} srclen/i, i8 imm/i; flags %RMEM wflags;
1089 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x61) !RRM%(RM)($dst,$src) !I8($imm);
1090 fold %(V(w))PCMPESTRI_ECX_%(XN(w))_EAX_M_EDX_I}
1091 %(V(w))PCMPESTRM_XMM0_%(XN(w))_EAX_%(XNM(w))_EDX_I
1092 {isa %ISA sse42; ops %VR{xmm0} ix/o, %VR dst/i, GR32{eax} dstlen/i, %VRM src/i, GR32{edx} srclen/i, i8 imm/i; flags %RMEM wflags;
1093 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x60) !RRM%(RM)($dst,$src) !I8($imm);
1094 fold %(V(w))PCMPESTRM_XMM0_%(XN(w))_EAX_M_EDX_I}
1095 %(V(w))PCMPISTRI_ECX_%(XN(w))%(XNM(w))I
1096 {isa %ISA sse42; ops GR32{ecx} ix/o, %VR dst/i, %VRM src/i, i8 imm/i; flags %RMEM wflags;
1097 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x63) !RRM%(RM)($dst,$src) !I8($imm);
1098 fold %(V(w))PCMPISTRI_ECX_%(XN(w))MI}
1099 %(V(w))PCMPISTRM_XMM0_%(XN(w))%(XNM(w))I
1100 {isa %ISA sse42; ops %VR{xmm0} ix/o, %VR dst/i, %VRM src/i, i8 imm/i; flags %RMEM wflags;
1101 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x62) !RRM%(RM)($dst,$src) !I8($imm);
1102 fold %(V(w))PCMPISTRM_XMM0_%(XN(w))MI}
1103 %(V(w))AESDEC_%(XN(w))%(X(w))%(XNM(w))
1104 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1105 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xde) !RRM%(RM)($dst,$src);
1106 fold %(V(w))AESDEC_%(XN(w))%(X(w))M }
1107 %(V(w))AESDECLAST_%(XN(w))%(X(w))%(XNM(w))
1108 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1109 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdf) !RRM%(RM)($dst,$src);
1110 fold %(V(w))AESDECLAST_%(XN(w))%(X(w))M}
1111 %(V(w))AESENC_%(XN(w))%(X(w))%(XNM(w))
1112 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1113 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdc) !RRM%(RM)($dst,$src);
1114 fold %(V(w))AESENC_%(XN(w))%(X(w))M}
1115 %(V(w))AESENCLAST_%(XN(w))%(X(w))%(XNM(w))
1116 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1117 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdd) !RRM%(RM)($dst,$src);
1118 fold %(V(w))AESENCLAST_%(XN(w))%(X(w))M}
1119 %(V(w))AESIMC_%(XN(w))%(XNM(w))
1120 {isa aes %(w[1]); ops %VR dst/o, %VRM src/i; flags %RMEM;
1121 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdb) !RRM%(RM)($dst,$src);
1122 fold %(V(w))AESIMC_%(XN(w))M}
1123 %(V(w))AESKEYGENASSIST_%(XN(w))%(XNM(w))I
1124 {isa aes %(w[1]); ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
1125 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0xdf) !RRM%(RM)($dst,$src) !I8($imm);
1126 fold %(V(w))AESKEYGENASSIST_%(XN(w))MI}
1127 %(V(w))PCLMULQDQ_%(XN(w))%(X(w))%(XNM(w))I
1128 {isa clmul %(w[1]); ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
1129 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x44) !RRM%(RM)($dst,$src) !I8($imm);
1130 fold %(V(w))PCLMULQDQ_%(XN(w))%(X(w))MI}
1131 | else -- w[2] == 256
1133 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1134 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x36) !RRM%(RM)($dst,$src);
1137 {isa avx2; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
1138 pref 66; %(VEX_D0S(w,1)); extopcode 0f3a; coding !parsub(0x00) !RRM%(RM)($dst,$src) !I8($imm);
1140 VPERM2I128_YY%(YM(w))I
1141 {isa avx2; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
1142 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x46) !RRM%(RM)($dst,$src) !I8($imm);
1143 fold VPERM2I128_YYMI}
1144 VEXTRACTI128_%(XM(w))YI
1145 {isa avx2; ops %(rrm(w[3],vreg("I",128),vmem("I",128))) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
1146 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x39) !RRM%(RM)($src,$dst) !I8($imm);
1147 fold VEXTRACTI128_M128YI}
1148 VINSERTI128_YY%(XM(w))I
1149 {isa avx2; ops %(DST(w,VR)), %(rrm(w[3],vreg("I",128),vmem("I",128))) src/i, i8 imm/i; flags %RMEM;
1150 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x38) !RRM%(RM)($dst,$src) !I8($imm);
1151 fold VINSERTI128_YYM128I}
1154 VZEROALL {isa avx; vex rr 0 0 0 0 1; extopcode 0f; coding !parsub(0x77)}
1155 VZEROUPPER {isa avx; vex rr 0 0 0 0 0; extopcode 0f; coding !parsub(0x77)}
1157 | --MOVDV64_32_RR { out VRI64 dst : in GR32 src } {parm 0x6e : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {mmx}
1158 | --MOVDV64_32_RM { out VRI64 dst : in i32* src : rmem } {parm 0x6e : rexrm 0 dst src : code $parm !RRMM($dst,$src) : pref 0x0f} {mmx}
1159 | --MOVD32_V64_MR { in i32* dst, VRI64 src : wmem } {parm 0x7e : rexrm 0 src dst : code $parm !RRMM($src,$dst) : pref 0x0f} {mmx}
1160 | --MOVD32_V64_RR { out GR32 dst : in VRI64 src } {parm 0x7e : rexrr 0 src dst : code $parm !RRMR($src,$dst) : pref 0x0f} {mmx}
1162 | --MOVQV64_64_RR { out VRI64 dst : in GR64 src } {parm 0x6e : rexrr 1 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {x64}
1163 | --MOVQV64_64_RM { out VRI64 dst : in i64* src : rmem } {parm 0x6e : rexrm 1 dst src : code $parm !RRMM($dst,$src) : pref 0x0f} {x64}
1164 | --MOVQ64_V64_MR { in i64* dst, VRI64 src : wmem } {parm 0x7e : rexrm 1 src dst : code $parm !RRMM($src,$dst) : pref 0x0f} {x64}
1165 | --MOVQ64_V64_RR { out GR64 dst : in VRI64 src } {parm 0x7e : rexrr 1 src dst : code $parm !RRMR($src,$dst) : pref 0x0f} {x64}
1167 | --MOVQV64_MR { in VI64* dst, VRI64 src : wmem } {parm 0x7f : rexrm 0 src dst : code $parm !RRMM($src,$dst) : pref 0x0f} {mmx}
1168 | --MOVQV64_RR { out VRI64 dst : in VRI64 src } {parm 0x6f : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {mmx}
1169 | --MOVQV64_RM { out VRI64 dst : in VI64* src : rmem } {parm 0x6f : rexrm 0 dst src : code $parm !RRMM($dst,$src) : pref 0x0f} {mmx}
1170 | -- ----------------------------------
1172 | --MOVQ2DQ_RR { out VRI128 dst : in VRI64 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf2} {sse2}
1173 | --MOVDQ2Q_RR { out VRI64 dst : in VRI128 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf3} {sse2}
1174 | --MOVQV128_V64_RR { out VRI128 dst : in VRI64 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf2} {sse2}
1175 | --MOVQV64_V128_RR { out VRI64 dst : in VRI128 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf3} {sse2}
1176 | -- ----------------------------------
1177 | --PMOVMSK64_RR { out GR32 dst : in VRI64 src } {parm 0xd7 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {mmxext}
1178 | --PSHUFW64_RRI { out VRI64 dst : in VRI64 src,i8 imm } { parm 0x70 : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1179 | --PSHUFW64_RMI { out VRI64 dst : in VI64* src,i8 imm : rmem } { parm 0x70 : rexrm 0 dst src : code $parm !RRMM($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1181 LDMXCSR_M32 //= LDMXCSR.2
1182 {isa sse; ops i32* dst/i; flags rmem; rex rm 0 0 dst; extopcode 0f; coding !par(0xae) !RRMM(!sub(2),$dst)}
1183 STMXCSR_M32 //= STMXCSR.3
1184 {isa sse; ops i32* dst/i; flags wmem; rex rm 0 0 dst; extopcode 0f; coding !par(0xae) !RRMM(!sub(3),$dst)}
1186 | --PINSRW64_RRI { out VRI64 dst : in VRI64 dst, GR32 src,i8 imm } { parm 0xc4 : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1187 | --PINSRW64_RMI { out VRI64 dst : in VRI64 dst, i16* src,i8 imm : rmem } { parm 0xc4 : rexrm 0 dst src : code $parm !RRMM($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1189 | --PEXTRW64_RRI { out GR32 dst : in VRI64 src,i8 imm } { parm 0xc5 : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1190 | -- there exist 64bit versions of pinsrw & pextrw, but they don't make sense, do they?
1192 | --PALIGNR64_RR { out VRI64 dst : in VRI64 dst,VRI64 src,i8 imm } { parm 0x0f : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f3a } {ssse3}
1193 | --PALIGNR64_RM { out VRI64 dst : in VRI64 dst,VI64* src,i8 imm : rmem } { parm 0x0f : rexrm 0 dst src : code $parm !RRMM($dst,$src) !I8($imm) : pref 0x0f3a } {ssse3}