1 #/bin/emblua LUAPREFIX=|
5 | -----------------------------------
6 | function imm(bits) return "i"..(bits <= 32 and bits or 32) end
7 | function immv(bits) return imm(bits)..(bits > 8 and "_8" or "") end
8 | function IMM(bits) return "!I"..(bits <= 32 and bits or 32) end
9 | function IMMV(bits) return IMM(bits)..(bits > 8 and "_8" or "") end
10 | function pref16(bits) return bits == 16 and "66" or "" end
11 | function isa64(bits,x,y) return bits > 32 and (y or "x64") or x or "" end
12 | function greg(bits) return "GR"..bits end
13 | function gmem(bits) return "i"..bits.."*" end
14 | function r64bit(bits) return bits == 64 and 1 or 0 end
15 | function r16bit(bits) return bits > 8 and 1 or 0 end
16 | function rrm(rm,r,m) return rm == "rr" and r or m end
17 | function grm(rm,n) return rrm(rm,greg(n),gmem(n)) end
18 | ccs = {"O", "NO", "B", "AE", "E", "NE", "BE", "A", "S", "NS", "PE", "PO", "L", "GE", "LE", "G"}
19 | ccsflags = {"rflag_o", "rflag_o", "rflag_c", "rflag_c", "rflag_z", "rflag_z", "rflag_c rflag_z", "rflag_c rflag_z",
20 | "rflag_s", "rflag_s", "rflag_p", "rflag_p", "rflag_o", "rflag_s rflag_o", "rflag_s rflag_o rflag_z", "rflag_s rflag_o rflag_z"}
21 | --// local _ccalias={[2]="c",[2]="nae",[3]="nb",[3]="nc",[4]="z",[5]="nz",[6]="na",[7]="nbe",
22 | --// [10]="p",[11]="np",[12]="nge",[13]="nl",[14]="ng",[15]="nle"}
23 | local _RN = {[8] = "B", [16] = "H", [32] = "W", [64] = "D"}
24 | local _EAX = {[8] = "AL", [16] = "AX", [32] = "EAX", [64] = "RAX"}
25 | local _EDX = {[8] = "DL", [16] = "DX", [32] = "EDX", [64] = "RDX"}
26 | local RN = function(N) return _RN[N] end
27 | local MN = function(N) return "M"..N end
28 | local RNM = function(rm, N) return rrm(rm, RN(N), "M") end
29 | local RNMN = function(rm, N) return rrm(rm, RN(N), MN(N)) end
30 | local EAX = function(N) return _EAX[N] or "??" end
31 | local EDX = function(N) return _EDX[N] or "??" end
33 | for rm_,rm in ipairs{"rr","rm"} do
34 | local RM, ROMI, RIOMI = rrm(rm,"R","M"), rrm(rm, "o", "i"), rrm(rm, "io", "i")
35 | local RMEM, WMEM, RWMEM = rrm(rm,"","rmem"), rrm(rm,"","wmem"), rrm(rm,"","rwmem")
36 | for k,v in ipairs{8,16,32,64} do
37 | local M,R,I, R16,W = gmem(v),greg(v),imm(v),r16bit(v),r64bit(v)
38 | local RRM = rrm(rm,R,M)
39 | local ISA, P = isa64(v), pref16(v)
40 MOV_%(RNM(rm,v))%(RN(v))
41 {isa %ISA; ops %RRM dst/%(ROMI), %R src/i; flags %WMEM;
42 pref %P; rex %rm %W src dst; coding !par(%(0x88+R16)) !RRM%(RM)($src,$dst);
43 fold MOV_M%(RN(v)) MOV_%(RN(v))M}
44 | for l,w in ipairs{{"ADD",0, 1}, {"OR",1, 1}, {"AND",4, 1}, {"SUB",5, 0}, {"XOR",6, 1},
45 | {"ADC",2, 1, 1}, {"SBB",3, 0, 1}} do
46 %(w[1])_%(RNMN(rm,v))I //= ARITH%(v)_%(RM)I.%(w[2])
47 {isa %ISA; ops %RRM dst/%(RIOMI), %I imm/i; flags wflags %RWMEM %(w[4] and "rflag_c" or "");
48 pref %P; rex %rm %W 0 dst; coding !par(%(0x80+R16)) !RRM%(RM)(!sub(%(w[2])),$dst) %(IMMV(v))($imm);
49 fold %(w[1])_%(MN(v))I}
50 %(w[1])_%(RNM(rm,v))%(RN(v)) //: ARITH_%(RM)%(RN(v))
51 {isa %ISA; ops %RRM dst/%(RIOMI), %R src/i; flags wflags %RWMEM %(w[4] and "rflag_c" or "");
52 pref %P; rex %rm %W src dst; coding !sub(%(w[2]))*8+!par(%(0x00+R16)) !RRM%(RM)($src,$dst);
53 fold %(w[1])_M%(RN(v)) %(w[1])_%(RN(v))M; commute %(w[3] > 0 and "dst<->src" or "")}
55 %(w[1])_%(RN(v))%(RNM(rm,v)) //= ARITH%(v)_R%(RM).%(w[2])
56 {isa %ISA; ops %R dst/io, %RRM src/i; flags wflags %RMEM %(w[4] and "rflag_c" or "");
57 pref %P; rex %rm %W dst src; coding !sub(%(w[2]))*8+!par(%(0x02+R16)) !RRM%(RM)($dst,$src);
58 fold %(w[1])_M%(RN(v)) %(w[1])_%(RN(v))M; commute %(w[3] > 0 and "dst<->src" or "")}
60 %(w[1])_%(EAX(v))_I //: ARITH_%(EAX(v))_I
61 {isa %ISA; ops %R {%(EAX(v))} dst/io, %I imm/i; flags wflags %(w[4] and "rflag_c" or "");
62 pref %P; rex rr %W 0 0; coding !sub(%(w[2]))*8+!par(%(0x04+R16)) %(IMMV(v))($imm)}
66 {isa %ISA; ops %RRM dst/i, %I imm/i; flags wflags %RMEM;
67 pref %P; rex %rm %W 0 dst; coding !par(%(0x80+R16)) !RRM%(RM)(!sub(7),$dst) %(IMMV(v))($imm);
69 CMP_%(RNM(rm,v))%(RN(v))
70 {isa %ISA; ops %RRM dst/i, %R src/i; flags wflags %RMEM;
71 pref %P; rex %rm %W src dst; coding !sub(7)*8+!par(%(0x00+R16)) !RRM%(RM)($src,$dst);
72 fold CMP_M%(RN(v)) CMP_%(RN(v))M}
74 {isa %ISA; ops %RRM dst/i, %I imm/i; flags wflags %RMEM;
75 pref %P; rex %rm %W 0 dst; coding !par(%(0xf6+R16)) !RRM%(RM)(!sub(0),$dst) %(IMM(v))($imm);
77 TEST_%(RNM(rm,v))%(RN(v))
78 {isa %ISA; ops %RRM dst/i, %R src/i; flags wflags %RMEM;
79 pref %P; rex %rm %W src dst; coding !par(%(0x84+R16)) !RRM%(RM)($src,$dst);
80 fold TEST_%(RN(v))M TEST_M%(RN(v)); commute dst<->src}
81 | for l,w in ipairs{{"INC",0,0xfe,"wflag_c"},{"DEC",1,0xfe,"wflag_c"},{"NOT",2,0xf6,"wflags"},{"NEG",3,0xf6,"wflags"}} do
82 | local suffix = (w[1] == "INC" or w[1] == "DEC") and (v == 16 or v == 32) and rm == "rr" and "_x64" or ""
83 | -- DISABLED INC/DEC_H/W_x64 in 32bit mode, because we can't generate it via nasm
84 %(w[1])_%(RNMN(rm,v))%(suffix) //= UNARY%(v)_%(RM).%(w[2])
85 {isa %ISA %(#suffix > 0 and "x64" or ""); ops %RRM dst/%(RIOMI); flags %(w[4]) %RWMEM;
86 pref %P; rex %rm %W 0 dst; coding !par(%(w[3]+R16)) !RRM%(RM)(!sub(%(w[2])),$dst);
87 fold %(w[1])_%(MN(v))}
89 | for l,w in ipairs{{"ROL",0},{"ROR",1},{"SHL",4},{"SHR",5},{"SAR",7}} do
90 %(w[1])_%(RNMN(rm,v))I //= SHIFT%(v)_%(RM)I.%(w[2])
91 {isa %ISA; ops %RRM dst/%(RIOMI), i8 sa/i; flags wflags %RWMEM;
92 pref %P; rex %rm %W 0 dst; coding !par(%(0xc0+R16)) !RRM%(RM)(!sub(%(w[2])),$dst) !I8_1($sa);
93 fold %(w[1])_%(MN(v))I}
94 %(w[1])_%(RNMN(rm,v))_CL //= SHIFT%(v)_%(RM)_CL.%(w[2])
95 {isa %ISA; ops %RRM dst/%(RIOMI), GR8{cl} sa/i; flags wflags %RWMEM;
96 pref %P; rex %rm %W 0 dst; coding !par(%(0xd2+R16)) !RRM%(RM)(!sub(%(w[2])),$dst);
97 fold %(w[1])_%(MN(v))_CL}
99 | for l,w in ipairs{{"RCL",2},{"RCR",3}} do
100 %(w[1])_%(RNMN(rm,v))I //= SHIFTC%(v)_%(RM)I.%(w[2])
101 {isa %ISA; ops %RRM dst/%(RIOMI), i8 sa/i; flags wflags %RWMEM rflag_c;
102 pref %P; rex %rm %W 0 dst; coding !par(%(0xc0+R16)) !RRM%(RM)(!sub(%(w[2])),$dst) !I8_1($sa);
103 fold %(w[1])_%(MN(v))I}
104 %(w[1])_%(RNMN(rm,v))_CL //= SHIFTC%(v)_%(RM)_CL.%(w[2])
105 {isa %ISA; ops %RRM dst/%(RIOMI), GR8{cl} sa/i; flags wflags %RWMEM rflag_c;
106 pref %P; rex %rm %W 0 dst; coding !par(%(0xd2+R16)) !RRM%(RM)(!sub(%(w[2])),$dst);
107 fold %(w[1])_%(MN(v))_CL}
110 {isa %ISA sse42; ops GR32 dst/io, %RRM src/i; flags %RMEM;
111 pref %(P)f2; rex %rm %W dst src; extopcode 0f38; coding !parsub(%(0xf0+R16)) !RRM%(RM)($dst,$src);
112 fold CRC32_W%(MN(v))}
113 CMPXCHG_%(RNM(rm,v))%(RN(v))_%(EAX(v))
114 {isa %ISA; ops %RRM dst/%(rrm(rm,"io","i")), %R src/i, %R{%(EAX(v))} cmp/io; flags %RMEM wflags;
115 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(0xb0+R16)) !RRM%(RM)($src,$dst);
116 fold CMPXCHG_M%(RN(v))_%(EAX(v))}
117 XADD_%(RNM(rm,v))%(RN(v))
118 {isa %ISA; ops %RRM src/%(RIOMI), %R dst/io; flags %RWMEM;
119 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(%(0xc0+R16)) !RRM%(RM)($dst,$src);
122 | local suf = v == 64 and "32S" or ""
124 {isa %ISA; ops %M dst/i, %I imm/i; flags wmem;
125 pref %P; rex rm %W 0 dst; coding !par(%(0xc6+R16)) !RRMM(!sub(0),$dst) %(IMM(v))($imm)}
127 {isa %ISA; ops %R dst/o, %M src/i; flags rmem;
128 pref %P; rex rm %W dst src; coding !par(%(0x8a+R16)) !RRMM($dst,$src)}
130 {isa %ISA; ops %R dst/i, %M src/i; flags wflags rmem;
131 pref %P; rex rm %W dst src; coding !sub(7)*8+!par(%(0x02+R16)) !RRM%(RM)($dst,$src)}
133 {isa %ISA; ops %R dst/i, %M src/i; flags wflags rmem;
134 pref %P; rex rm %W dst src; coding !par(%(0x84+R16)) !RRMM($dst,$src)}
136 {isa %ISA; ops %M src/i, %R dst/io; flags %RWMEM;
137 pref %P; rex %rm %W dst src; coding !parsub(%(0x86+R16)) !RRM%(RM)($dst,$src)}
140 {isa %ISA; ops %R dst/o, i%v imm/i;
141 pref %P; rex rr %W 0 dst; coding !par(%(0xb0+R16*8))+($dst&7) !I%v($imm);
142 fold %(v < 64 and ("MOV_M"..v.."I"))}
144 {isa %ISA; ops %R {%(EAX(v))} dst/i, %I imm/i; flags wflags;
145 pref %P; rex rr %W 0 0; coding !sub(7)*8+!par(%(0x04+R16)) %(IMMV(v))($imm)}
146 XCHG_%(RN(v))%(RN(v))
147 {isa %ISA; ops %R src/io, %R dst/io; flags;
148 pref %P; rex %rm %W src dst; coding !parsub(%(0x86+R16)) !RRM%(RM)($src,$dst);
149 fold XCHG_M%(RN(v)); commute dst<->src}
154 | for l,w in ipairs{{"SHLD",0xa4},{"SHRD",0xac}} do
155 %(w[1])_%(RNM(rm,v))%(RN(v))I //= SHD_%(RNM(rm,v))%(RN(v))I.%(w[2]%16)}
156 {isa %ISA; ops %RRM dst/%(RIOMI), %R src/i, i8 sa/i; flags wflags %RWMEM;
157 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(w[2])) !RRM%(RM)($src,$dst) !I8($sa);
158 fold %(w[1])_M%(RN(v))I}
159 %(w[1])_%(RNM(rm,v))%(RN(v))_CL //= SHD%(v)_%(RNM(rm,v))%(RN(v))_CL.%(w[2]%16)
160 {isa %ISA; ops %RRM dst/%(RIOMI), %R src/i, GR8{cl} sa/i; flags wflags %RWMEM;
161 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(w[2]+1)) !RRM%(RM)($src,$dst);
162 fold %(w[1])_M%(RN(v))_CL}
164 | local RAX, RDX = EAX(v), EDX(v)
165 | for l,w in ipairs{{"",6},{"I",7}} do
166 %(w[1])DIV_%(RNMN(rm,v)) //= DIVREM_%(RNMN(rm,v)).%(w[2])
167 {isa %ISA; ops %R{%RAX} res/io, %R{%RDX} rem/io, %RRM src/i; flags wflags %RMEM;
168 pref %P; rex %rm %W 0 src; coding !par(0xf7) !RRM%(RM)(!sub(%(w[2])),$src);
169 fold %(w[1])DIV_%(MN(v))}
170 %(w[1])MUL_%(RNMN(rm,v)) //= XMUL_%(RNMN(rm,v)).%(w[2]-2))
171 {isa %ISA; ops %R{%RAX} lo/io, %R{%RDX} hi/o, %RRM src/i; flags wflags %RMEM;
172 pref %P; rex %rm %W 0 src; coding !par(0xf7) !RRM%(RM)(!sub(%(w[2]-2)),$src);
173 fold %(w[1])MUL_%(MN(v)); commute lo<->src}
175 IMUL_%(RN(v))%(RNM(rm,v))
176 {isa %ISA; ops %R dst/io, %RRM src/i; flags wflags %RMEM;
177 pref %P; rex %rm %W dst src; extopcode 0f; coding !par(0xaf) !RRM%(RM)($dst,$src);
178 fold IMUL_%(RN(v))RM; commute dst<->src}
179 IMUL_%(RN(v))%(RNM(rm,v))I
180 {isa %ISA; ops %R dst/o, %RRM src/i, %I imm/i; flags wflags %RMEM;
181 pref %P; rex %rm %W dst src; coding !par(0x69) !RRM%(RM)($dst,$src) %(IMMV(v))($imm);
182 fold IMUL_%(RN(v))MI}
183 BSF_%(RN(v))%(RNM(rm,v)) //= BS%(v)_%(RN(v))%(RNM(rm,v)).12
184 {isa %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
185 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbc) !RRM%(RM)($dst,$src);
187 BSR_%(RN(v))%(RNM(rm,v)) //= BS%(v)_%(RN(v))%(RNM(rm,v)).13
188 {isa %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
189 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbd) !RRM%(RM)($dst,$src);
191 BT_%(RNM(rm,v))%(RN(v))
192 {isa %ISA; ops %RRM dst/i, %R src/i; flags wflags %RMEM;
193 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(0xa3) !RRM%(RM)($src,$dst)}
195 {isa %ISA; ops %RRM dst/i, i8 imm/i; flags wflags %RMEM;
196 pref %P; rex %rm %W 0 dst; extopcode 0f; coding !par(0xba) !RRM%(RM)(!sub(4),$dst) !I8($imm)}
197 | for l,w in ipairs{{"S",0xab,5},{"R",0xb3,6},{"C",0xbb,7}} do
198 BT%(w[1])_%(RNM(rm,v))%(RN(v)) //= BTX_%(RNM(rm,v))%(RN(v)).11
199 {isa %ISA; ops %RRM dst/%(RIOMI), %R src/i; flags wflags %RWMEM;
200 pref %P; rex %rm %W src dst; extopcode 0f; coding !parsub(%(w[2])) !RRM%(RM)($src,$dst)}
201 BT%(w[1])_%(RNMN(rm,v))I //= BTX_%(RNMN(rm,v))I.5 {
202 {isa %ISA; ops %RRM dst/%(RIOMI), i8 imm/i; flags wflags %RWMEM;
203 pref %P; rex %rm %W 0 dst; extopcode 0f; coding !par(0xba) !RRM%(RM)(!sub(%(w[3])),$dst) !I8($imm)}
205 | for l,w in ipairs(ccs) do
206 CMOV%(w)_%(RN(v))%(RNM(rm,v)) //= CMOV_%(RN(v))%(RNM(rm,v)).%(l-1)
207 {isa cmov %ISA; ops %R dst/io, %RRM src/i; flags subflags %RMEM;
208 pref %P; rex %rm %W dst src; extopcode 0f; coding !parsub(%(0x40+l-1)) !RRM%(RM)($dst,$src);
209 fold CMOV%(w)_%(RN(v))M}
213 {isa movbe %ISA; ops %R dst/o, %RRM src/i; flags rmem;
214 pref %P; rex rm %W dst src; extopcode 0f38; coding !parsub(0xf0) !RRMM($dst,$src)}
216 {isa movbe %ISA; ops %RRM dst/i, %R src/i; flags wmem;
217 pref %P; rex rm %W src dst; extopcode 0f38; coding !parsub(0xf1) !RRMM($src,$dst)}
219 XCHG_%(EAX(v))_%(RN(v))
220 {isa %ISA; ops %R dst/io, %R{%(EAX(v))} src/io;
221 pref %P; rex rr %W 0 dst; coding !parsub(0x90)+($dst&7)}
227 POPCNT_%(RN(v))%(RNM(rm,v))
228 {isa popcnt %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
229 pref f3; rex %rm %W dst src; extopcode 0f; coding !par(0xb8) !RRM%(RM)($dst,$src);
230 fold POPCNT_%(RN(v))M}
231 | for l,w in ipairs{8,16} do
232 MOVZX_%(RN(v))%(RNMN(rm,w)) //= MOVX_%(RN(v))%(RNMN(rm,w)).6
233 {isa %ISA; ops %R dst/o, %(grm(rm,w)) src/i; flags %RMEM;
234 rex %rm %W dst src; extopcode 0f; coding !parsub(%(0xb6+w/8-1)) !RRM%(RM)($dst,$src);
235 fold MOVZX_%(RN(v))%(MN(w))}
236 MOVSX_%(RN(v))%(RNMN(rm,w)) //= MOVX_%(RN(v))%(RNMN(rm,w)).14
237 {isa %ISA; ops %R dst/o, %(grm(rm,w)) src/i; flags %RMEM;
238 rex %rm %W dst src; extopcode 0f; coding !parsub(%(0xbe+w/8-1)) !RRM%(RM)($dst,$src);
239 fold MOVSX_%(RN(v))%(MN(w))}
241 ANDN_%(RN(v))%(RN(v))%(RNM(rm,v))
242 {isa bmi1 %ISA; ops %R dst/o, %R src1/i, %RRM src/i; flags wflags %RMEM;
243 vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf2) !RRM%(RM)($dst,$src);
244 fold ANDN_%(RN(v))%(RN(v))M}
245 BEXTR_%(RN(v))%(RNM(rm,v))%(RN(v))
246 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags wflags %RMEM;
247 vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
248 fold BEXTR_%(RN(v))M%(RN(v))}
249 BLSI_%(RN(v))%(RNM(rm,v)) //= BMI1B_%(RN(v))%(RNM(rm,v)).3
250 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
251 vex %rm %W dst 0 src 0; extopcode 0f38; coding !par(0xf3) !RRM%(RM)(!sub(3),$src);
253 BLSMSK_%(RN(v))%(RNM(rm,v)) //= BMI1B_%(RN(v))%(RNM(rm,v)).2
254 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
255 vex %rm %W dst 0 src 0; extopcode 0f38; coding !par(0xf3) !RRM%(RM)(!sub(2),$src);
256 fold BLSMSK_%(RN(v))M}
257 BLSR_%(RN(v))%(RNM(rm,v)) //= BMI1B%(v)_%(RN(v))%(RNM(rm,v)).1
258 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
259 vex %rm %W dst 0 src 0; extopcode 0f38; coding !par(0xf3) !RRM%(RM)(!sub(1),$src);
261 BZHI_%(RN(v))%(RNM(rm,v))%(RN(v))
262 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags wflags %RMEM;
263 vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf5) !RRM%(RM)($dst,$src);
264 fold BZHI_%(RN(v))M%(RN(v))}
265 LZCNT_%(RN(v))%(RNM(rm,v))
266 {isa lzcnt %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
267 pref f3 %P; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbd) !RRM%(RM)($dst,$src);
268 fold LZCNT_%(RN(v))M}
269 TZCNT_%(RN(v))%(RNM(rm,v))
270 {isa bmi1 %ISA; ops %R dst/o, %RRM src/i; flags wflags %RMEM;
271 pref f3; rex %rm %W dst src; extopcode 0f; coding !parsub(0xbc) !RRM%(RM)($dst,$src);
272 fold TZCNT_%(RN(v))M}
273 MULX_%(RN(v))%(RN(v))%(RNM(rm,v))
274 {isa bmi2 %ISA; ops %R dst/o, %R src1/o, %RRM src/i, %R{%(EDX(v))} src2/i; flags %RMEM;
275 pref f2; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf6) !RRM%(RM)($dst,$src);
276 fold MULX_%(RN(v))%(RN(v))M; commute src1<->src2}
277 PDEP_%(RN(v))%(RN(v))%(RNM(rm,v))
278 {isa bmi2 %ISA; ops %R dst/o, %R src1/i, %RRM src/i; flags %RMEM;
279 pref f2; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf5) !RRM%(RM)($dst,$src);
280 fold PDEP_%(RN(v))%(RN(v))M}
281 PEXT_%(RN(v))%(RN(v))%(RNM(rm,v))
282 {isa bmi2 %ISA; ops %R dst/o, %R src1/i, %RRM src/i; flags %RMEM;
283 pref f3; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf5) !RRM%(RM)($dst,$src);
284 fold PEXT_%(RN(v))%(RN(v))M}
285 RORX_%(RN(v))%(RNM(rm,v))I
286 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, i8 imm/i; flags %RMEM;
287 pref f2; vex %rm %W 0 dst src 0; extopcode 0f3a; coding !parsub(0xf0) !RRM%(RM)($dst,$src) !I8($imm);
288 fold RORX_%(RN(v))MI}
289 SARX_%(RN(v))%(RNM(rm,v))%(RN(v))
290 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags %RMEM;
291 pref f3; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
292 fold SARX_%(RN(v))M%(RN(v))}
293 SHLX_%(RN(v))%(RNM(rm,v))%(RN(v))
294 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags %RMEM;
295 pref 66; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
296 fold SHLX_%(RN(v))M%(RN(v))}
297 SHRX_%(RN(v))%(RNM(rm,v))%(RN(v))
298 {isa bmi2 %ISA; ops %R dst/o, %RRM src/i, %R src1/i; flags %RMEM;
299 pref f2; vex %rm %W src1 dst src 0; extopcode 0f38; coding !parsub(0xf7) !RRM%(RM)($dst,$src);
300 fold SHRX_%(RN(v))M%(RN(v))}
303 {isa %ISA; ops %R dst/o, i8* src/i;
304 rex rm %W dst src; coding !par(0x8d) !RRMM($dst,$src)}
307 {isa %ISA; ops %R dst/io;
308 pref %P; rex rr %W 0 dst; extopcode 0f; coding !par(0xc8)+($dst&7)}
309 LEA_%(RN(v))%(RN(v))%(RN(v))II
310 {isa %ISA; ops %R dst/o, %R src1/i, %R src2/i, i8 mul2/i, i32 imm/i;
311 rexlea %W dst src1 src2; coding !par(0x8d) !RRMLEA($dst,$src1,$src2,$mul2,$imm)}
313 {isa rdrand %ISA; ops %R dst/o; flags wflags;
314 pref %P; rex rr %W 0 dst; extopcode 0f; coding !par(0xc7) !RRMR(!sub(6),$dst)}
316 {isa x64; ops %R dst/o;
317 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(0),$dst)}
319 {isa x64; ops %R dst/o;
320 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(1),$dst)}
322 {isa x64; ops %R dst/i;
323 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(2),$dst)}
325 {isa x64; ops %R dst/i;
326 pref f3; rex rr %W 0 dst; extopcode 0f; coding !par(0xae) !RRMR(!sub(3),$dst)}
330 MOVSX_D%(RNMN(rm,32))
331 {isa x64only; ops GR64 dst/o, %(grm(rm,32)) src/i; flags %RMEM;
332 rex %rm 1 dst src; coding !par(%(0x63)) !RRM%(RM)($dst,$src);
334 DIV_%(RNMN(rm,8)) //= DIVREM8_%(RM).6
335 {isa ; ops GR8{al} quot/o, GR8{ah} rem/o, GR16{ax} div/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
336 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(6),$src);
338 IDIV_%(RNMN(rm,8)) //= DIVREM8_%(RM).7
339 {isa ; ops GR8{al} quot/o, GR8{ah} rem/o, GR16{ax} div/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
340 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(7),$src);
342 MUL_%(RNMN(rm,8)) //= XMUL8_%(RM).4
343 {isa ; ops GR16{ax} pr/o, GR8{al} src2/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
344 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(4),$src);
345 fold MUL_M8; commute src<->src2}
346 IMUL_%(RNMN(rm,8)) //= XMUL8_%(RM).5
347 {isa ; ops GR16{ax} pr/o, GR8{al} src2/i, %(grm(rm,8)) src/i; flags wflags %RMEM;
348 rex %rm 0 0 src; coding !par(0xf6) !RRM%(RM)(!sub(5),$src);
349 fold IMUL_M8; commute src<->src2}
350 | for l,w in ipairs(ccs) do
351 SET%(w)_%(RNMN(rm,8)) //= SET_%(RM).%(l-1)
352 {isa ; ops %(grm(rm,8)) dst/%(ROMI); flags subflags %WMEM;
353 rex %rm 0 0 dst; extopcode 0f; coding !parsub(%(0x90+l-1)) !RRM%(RM)(0,$dst);
356 //DISABLED: IJMP_%(RNMN(rm,32))_x32 //= IJMP%(RM)_x32.4
357 // {isa x32only; ops %(grm(rm,32)) src/i; flags cf_jmp %RMEM;
358 // rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(4),$src);
359 // fold IJMP_M32_x32}
360 //DISABLED: IJMP_%(RNMN(rm,64))_x64 //= IJMP%(RM)_x64.4
361 // {isa x64only; ops %(grm(rm,64)) src/i; flags cf_jmp %RMEM;
362 // rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(4),$src);
363 // fold IJMP_M64_x64}
364 //DISABLED: ICALL_%(RNMN(rm,32))_x32 //= ICALL%(RM)_x32.2
365 // {isa x32only; ops %(grm(rm,32)) src/i; flags cf_call %RMEM;
366 // rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(2),$src);
367 // fold ICALL_M32_x32}
368 //DISABLED: ICALL_%(RNMN(rm,64))_x64 //= ICALL%(RM)_x64.2
369 // {isa x64only; ops %(grm(rm,64)) src/i; flags cf_call %RMEM;
370 // rex %rm 0 0 src; coding !par(0xff) !RRM%(RM)(!sub(2),$src);
371 // fold ICALL_M64_x64}
374 | ------------------------------------
376 {isa x64; ops GR64 dst/o, i32 imm/i;
377 rex rr 1 0 dst; coding !par(0xc7) !RRMR(!sub(0),$dst) !I32($imm);
380 {isa x64; ops GR64 dst/o, i32 imm/i;
381 rex rr 0 0 dst; coding !par(0xb8)+($dst&7) !I32($imm)}
382 | ------------------------------------
383 INC_H_x32 //= INCDEC_H_x32.0
384 {isa x32only; ops GR16 dst/io; flags wflag_c;
385 pref 66; rex rr 0 0 dst; coding !parsub(0x40)+($dst&7);
387 DEC_H_x32 //= INCDEC_H_x32.8
388 {isa x32only; ops GR16 dst/io; flags wflag_c;
389 pref 66; rex rr 0 0 dst; coding !parsub(0x48)+($dst&7);
391 INC_W_x32 //= INCDEC_W_x32.0
392 {isa x32only; ops GR32 dst/io; flags wflag_c;
393 rex rr 0 0 dst; coding !parsub(0x40)+($dst&7);
395 DEC_W_x32 //= INCDEC_W_x32.8
396 {isa x32only; ops GR32 dst/io; flags wflag_c;
397 rex rr 0 0 dst; coding !parsub(0x48)+($dst&7);
399 CBW {ops GR16{ax} dst/o, GR8{al} src/i; pref 66; rex rr 0 0 0; coding !par(0x98)}
400 CWDE {ops GR32{eax} dst/o, GR16{ax} src/i; rex rr 0 0 0; coding !par(0x98)}
401 CDQE {ops GR64{rax} dst/o, GR32{eax} src/i; rex rr 1 0 0; coding !par(0x98); isa x64}
402 CWD {ops GR16{dx} dst/o, GR16{ax} src/i; pref 66; rex rr 0 0 0; coding !par(0x99)}
403 CDQ {ops GR32{edx} dst/o, GR32{eax} src/i; rex rr 0 0 0; coding !par(0x99)}
404 CQO {ops GR64{rdx} dst/o, GR64{rax} src/i; rex rr 1 0 0; coding !par(0x99); isa x64}
405 //DISABLED: PAUSE {} {pref f3; coding !parsub(0x90)}
406 CLC {flags wflag_c; coding !parsub(0xf8)}
407 //CLD {flags wflag_d; coding !parsub(0xfc)}
408 //CLI {flags wflag_i; coding !parsub(0xfa)}
409 CMC {flags rflag_c wflag_c; coding !parsub(0xf5)}
410 STC {flags wflag_c; coding !parsub(0xf9)}
411 //STD {flags wflag_d; coding !parsub(0xfd)}
412 //STI {flags wflag_i; coding !parsub(0xfb)}
413 CLFLUSH_M8 {isa clflush; ops i8* src/i; flags rwmem; rex rm 0 0 src; extopcode 0f; coding !par(0xae) !RRMM(!sub(7),$src)}
414 LAHF {ops GR8{ah} dst/o; flags rflags; coding !parsub(0x9f)}
415 SAHF {ops GR8{ah} dst/i; flags wflags; coding !parsub(0x9e)}
416 LFENCE {isa sse2; flags rwmem; rex rr 0 0 0; extopcode 0f; coding !par(0xae) !RRMR(!sub(5),0)}
417 MFENCE {isa sse2; flags rwmem; rex rr 0 0 0; extopcode 0f; coding !par(0xae) !RRMR(!sub(6),0)}
418 SFENCE {isa sse; flags rwmem; rex rr 0 0 0; extopcode 0f; coding !par(0xae) !RRMR(!sub(7),0)}
419 //MONITOR_RAX_ECX_EDX
420 // {isa monitor; ops GR32{eax} dst/i, GR32{ecx} ext/i, GR32{edx} hint/i; flags rwmem;
421 // extopcode 0f; coding 0x01 !parsub(0xc8)} //--0xc8
423 // {isa monitor; ops GR32{ecx} ext/i, GR32{eax} hint/i; flags rwmem;
424 // extopcode 0f; coding 0x01 !parsub(0xc9)} //--0xc9
426 //DISABLED: LOCK {coding !parsub(0xf0)}
427 //RDPMC {isa rdpmc; ops GR32{edx} hi/o, GR32{eax} lo/o, GR32{ecx} sel/i; extopcode 0f; coding !parsub(0x33)}
428 //xacquire is ignored if hle is not present
429 //useable with lock_{add,adc,and,btc,btr,bts,cmpxchg,cmpxchg8b,dec,inc,neg,not,or,sbb,sub,xor,xadd,xchg}_mr,
430 //useable with xchg_mr
431 //DISABLED: XACQUIRE {isa hle; coding !parsub(0xf2)}
432 //xrelease is ignored if hle is not present
433 //useable with lock_{add,adc,and,btc,btr,bts,cmpxchg,cmpxchg8b,dec,inc,neg,not,or,sbb,sub,xor,xadd,xchg}_mr,
434 //useable with xchg_mr, mov_mr, mov_mi
435 //DISABLED: XRELEASE {isa hle; coding !parsub(0xf3)}
436 // resume operation at fallback address of outermost xbegin fallback address, imm is provided as EAX[31:24]
437 //DISABLED: XABORT_I {isa rtm; ops i8 imm/i; rex rr 0 0 0; coding !par(0xc6) !RRMR(!sub(7),0) !I8($imm)}
438 //DISABLED: XBEGIN_REL16 {isa rtm; ops BB fback/i; flags cf_jmp cf_fallthru; pref 66; coding !par(0xc7) !RRMR(!sub(7),0) !REL16($fback)}
439 //DISABLED: XBEGIN_REL32 {isa rtm; ops BB fback/i; flags cf_jmp cf_fallthru; coding !par(0xc7) !RRMR(!sub(7),0) !REL32($fback)}
440 //DISABLED: XEND {isa rtm; extopcode 0f; coding 0x01 !parsub(0xd5)}
441 //DISABLED: XTEST {isa rtm; flags wflags; extopcode 0f; coding 0x01 !parsub(0xd6)}
444 {isa avx; ops GR32{edx} hi/o, GR32{eax} lo/o, GR32{ecx} sel/i; extopcode 0f; coding !par(1) !RRMR(!sub(2),0)}
447 {ops GR32{eax} r1/io, GR32{ebx} r2/o, GR32{ecx} r3/io, GR32{edx} r4/o; extopcode 0f; coding !parsub(0xa2)}
449 {isa rdtsc; ops GR32{eax} lo/o, GR32{edx} hi/o; extopcode 0f; coding !parsub(0x31)}
451 {isa rdtscp; ops GR32{eax} lo/o, GR32{edx} hi/o, GR32{ecx} aux/o; extopcode 0f; coding 0x01 !parsub(0xf9)}
453 | --------------------------
454 | for l,w in ipairs(ccs) do
455 J%(w)_BB_FT //= JCC_BB_FT.%(l-1)
456 {ops BB tgt/i, BB ft/i; flags cf_jmp cf_fallthru subflags;
457 extopcode 0f; coding !parsub(%(0x80+l-1)) !REL32_8_JCC($tgt) }
459 JMP_BB {ops BB tgt/i; flags cf_jmp; coding !par(0xe9) !REL32_8_JMP($tgt)}
460 JMP_FT {ops BB ft/i; flags cf_fallthru }
461 RET {ops GR32{esp} sp/io; flags cf_ret; coding !par(0xc3) }
462 //DISABLED: RET_AMD {ops GR32{esp} sp/io; flags cf_ret; pref f3; coding !par(0xc3)}
463 RET_I {ops GR32{esp} sp/io, i16 imm/i; flags cf_ret; coding !par(0xc2) !I16($imm) }
464 PUSH_I {ops GR32{esp} sp/io, i32 imm/i; flags usemem; coding !par(0x68) !I32_8($imm) }
465 //DISABLED: CALL_I_x32 {isa x32only; ops GR32{esp} sp/io, i32 tgt/i; flags cf_call; coding !par(0xe8) !REL32($tgt) }
466 //DISABLED: CALL_I_x64 {isa x64; ops GR32{esp} sp/io, i64 tgt/i; flags cf_call; coding !par(0xe8) !REL32($tgt) }
468 {isa x32only; ops GR32{esp} sp/io, GR32 dst/i; flags usemem;
469 rex rr 0 0 dst; coding !par(0x50)+($dst&7);
472 {isa x32only; ops GR32{esp} sp/io, GR32 dst/o; flags usemem;
473 rex rr 0 0 dst; coding !par(0x58)+($dst&7);
476 {isa x32only; ops GR32{esp} sp/io, i32* dst/i; flags usemem rmem;
477 rex rm 0 0 dst; coding !par(0xff) !RRMM(!sub(6),$dst) }
479 {isa x32only; ops GR32{esp} sp/io, i32* dst/i; flags usemem wmem;
480 rex rm 0 0 dst; coding !par(0x8f) !RRMM(!sub(0),$dst) }
482 {isa x64only; ops GR32{esp} sp/io, GR64 dst/i; flags usemem;
483 rex rr 0 0 dst; coding !par(0x50)+($dst&7);
486 {isa x64only; ops GR32{esp} sp/io, GR64 dst/o; flags usemem;
487 rex rr 0 0 dst; coding !par(0x58)+($dst&7);
490 {isa x64only; ops GR32{esp} sp/io, i64* dst/i; flags usemem rmem;
491 rex rm 0 0 dst; coding !par(0xff) !RRMM(!sub(6),$dst) }
493 {isa x64only; ops GR32{esp} sp/io, i32* dst/i; flags usemem wmem;
494 rex rm 0 0 dst; coding !par(0x8f) !RRMM(!sub(0),$dst) }
495 | --EMMS {} { parm 0x77; coding !par() : pref 0x0f }{ mmx }
497 {ops GR32{esp} sp/io; flags rflags usemem;
498 coding !parsub(0x9c)}
500 {ops GR32{esp} sp/io; flags wflags usemem;
501 coding !parsub(0x9d)}
503 CMPXCHG8B_M64_EDX_EAX_ECX_EBX
504 {isa ; ops i64* dst/i, GR32{eax} v1lo/io, GR32{edx} v1hi/io, GR32{ebx} v2lo/i, GR32{ecx} v2hi/i; flags wflags, wmem;
505 pref ; rex rm 0 0 dst; extopcode 0f; coding !par(0xc7) !RRMM(!sub(1),$dst)}
506 CMPXCHG16B_M128_RDX_RAX_RCX_RBX
507 {isa x64; ops i64* dst/i, GR64{rax} v1lo/io, GR64{rdx} v1hi/io, GR64{rbx} v2lo/i, GR64{rcx} v2hi/i; flags wflags, wmem;
508 pref ; rex rm 1 0 dst; extopcode 0f; coding !par(0xc7) !RRMM(!sub(1),$dst)}
509 | ------------------------------------
510 | function SDBITS(SD) return SD == "S" and 32 or 64 end
511 | function vreg(x,n) return "VR"..(n >= 128 and n or 128) end
512 | function vmem(x,n) return gmem(n) end
513 | function vmems(x,n) return vmem(x,SDBITS(x)) end
514 | function vrm(rm,x,n) return rrm(rm,vreg(x,n),vmem(x,n)) end
515 | function vrms(rm,x,n) return rrm(rm,vreg(x,n),vmems(x,n)) end
516 | local _XN = {[8] = "X", [16] = "X", [32] = "X", [64] = "X", [128] = "X", [256] = "Y", [512] = "Z"}
517 | local function V(w) return w[1] == "avx" and "V" or "" end
518 | local function XN(w) return _XN[w[2]] or "?" end
519 | local function XNH(w,h) return _XN[h and w[2]/2 or w[2]]end
520 | local function XNM(w) return rrm(w[3],XN(w),"M") end
521 | local function MH(w,h,f) return (h and ("M"..(w[2]/2))) or (f and "M"..w[2]) or "M" end
522 | local function XNMH(w,h,f) return rrm(w[3],XNH(w,h),MH(w,h,f)) end
523 | local function XNMN(w,h) return rrm(w[3],XN(w),(h and "M"..w[2] or "M")) end
524 | local function SN(SD) return "M"..SDBITS(SD) end
525 | local function XSN(w,SD) return rrm(w[3],"X",SN(SD)) end
526 | local function XNSN(w,SD) return rrm(w[3],XN(w),SN(SD)) end
527 | local function X(w) return w[1] == "avx" and XN(w) or "" end
528 | local function XM(w) return rrm(w[3],"X",w[2] == 128 and "M" or "M128") end
529 | local function XMN(w,n) return rrm(w[3],"X",w[2] == n and "M" or MN(n)) end
530 | local function YM(w) return rrm(w[3],"Y",w[2] == 256 and "M" or "M256") end
531 | local function DST(w, T) return w[1] == "avx" and (T.." dst/o, "..T.." src1/i") or (T.." dst/io") end
533 | function s_66(x) return x == "S" and "" or "66" end
534 | function s_f3(x) return x == "S" and "" or "f3" end
535 | function s66f2(x) return x == "S" and "66" or "f2" end
536 | function sf366(x) return x == "S" and "f3" or "66" end
537 | function sf3f2(x) return x == "S" and "f3" or "f2" end
538 | function s5be6(x) return x == "S" and "0x5b" or "0xe6" end
539 | function dupmem(x,n) return x == "D" and n == 128 and vmems(x,n) or vmem(x,n) end
540 | ------------------------------------
541 | sse = {{"ADD",0x58,1}, {"MUL",0x59,1}, {"MIN",0x5d,1}, {"MAX",0x5f,1}, {"SUB",0x5c,0}, {"DIV",0x5e,0}}
542 | ssea = {{"SQRT",0x51}}
543 | ssec = {{"RSQRT",0x52,"S"}, {"RCP",0x53,"S"}}
544 | sseb = {{"AND",0x54, 1}, {"ANDN",0x55, 0}, {"OR",0x56, 1}, {"XOR",0x57, 0}, {"UNPCKL",0x14,0}, {"UNPCKH",0x15,0},
545 | {"ADDSUB",0xd0,0,"sse3",{S="f2",D="66"}}, {"HADD",0x7c,0,"sse3",{S="f2",D="66"}}, {"HSUB",0x7d,0,"sse3",{S="f2",D="66"}},}
546 | ssecmp = {{"EQ",0}, {"LT",1}, {"LE",2}, {"UNORD",3}, {"NEQ",4}, {"NLT",5}, {"NLE",6}, {"ORD",7} }
547 | ssefma = {{"MADD",0x8,true},{"MSUB",0xa,true},{"NMADD",0xc,true},{"NMSUB",0xe,true},
548 | {"MADDSUB",6,false},{"MSUBADD",7,false}}
549 | function VEX(w,wbit,src1,reg,rm)
550 | return (w[1]=="avx" and "vex" or "rex").." "..w[3].." "
551 | ..wbit.." "..(w[1]=="avx" and src1 or "").." "..reg.." "..rm
552 | ..(w[1]=="avx" and (" "..(w[2] == 256 and 1 or 0)) or "")
554 | local function wbit(W) local t = {S=0,D=1}; return t[W] or W or 0 end
555 | function VEX_D0S(w,W) return VEX(w,wbit(W),"0","dst","src") end
556 | function VEX_S0D(w,W) return VEX(w,wbit(W),"0","src","dst") end
557 | function VEX_DS(w,W) return VEX(w,wbit(W),"src1","dst","src") end
558 | function VEX_SD(w,W) return VEX(w,wbit(W),"src1","src","dst") end
559 | function VEX_0D(w,W) return VEX(w,wbit(W),"src1",0,"dst") end
560 | for k,SD in ipairs{"S","D"} do
561 | local D = SD == "D" and 1 or 0
562 | local PP, PS = s_66(SD), sf3f2(SD)
563 | for l,w in ipairs{{"sse",128,"rr"},{"sse",128,"rm"},
564 | {"avx",128,"rr"},{"avx",128,"rm"},{"avx",256,"rr"},{"avx",256,"rm"}} do
565 | local VR, VM, SM = vreg(SD,w[2]), vmem(SD,w[2]), vmems(SD)
566 | local RM, ROMI, RIOMI = rrm(w[3],"R","M"), rrm(w[3], "o", "i"), rrm(w[3], "io", "i")
567 | local VRM, SRM = rrm(w[3],VR,VM), rrm(w[3],VR,SM)
568 | local RMEM, WMEM = rrm(w[3],"","rmem"), rrm(w[3],"","wmem")
569 | local COMMUTE = (w[1]=="avx" and "src1" or "dst").."<->src"
570 | local DS = SD == "D" and "S" or "D"
571 | local ISA = (w[1]=="avx" and w[1]) or (d == "D" and "sse2" or "sse")
572 | for m,u in ipairs{{"A",0x28},{"U",0x10}} do
573 | local PM, AU, PAR = PP, u[1], u[2]
574 | if w[3] == "rm" then
575 %(V(w))MOV%(AU)P%(SD)_M%(XN(w))
576 {isa %ISA; ops %VM dst/i, %VR src/i; flags wmem;
577 pref %PM; %(VEX_S0D(w)); extopcode 0f; coding !par(%(PAR+1)) !RRMM($src,$dst)}
579 %(V(w))MOV%(AU)P%(SD)_%(XN(w))%(XNM(w))
580 {isa %ISA; ops %VR dst/o, %VRM src/i; flags wmem;
581 pref %PM; %(VEX_D0S(w)); extopcode 0f; coding !par(%(PAR)) !RRM%(RM)($dst,$src);
582 fold %(V(w))MOV%(AU)P%(SD)_M%(XN(w)) %(V(w))MOV%(AU)P%(SD)_%(XN(w))M}
584 | for m,u in ipairs(sse) do
585 %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))%(XNM(w))
586 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
587 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
588 fold %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
590 | for m,u in ipairs(ssea) do
591 %(V(w))%(u[1])P%(SD)_%(XN(w))%(XNM(w))
592 {isa %ISA; ops %VR dst/o, %VRM src/i; flags %RMEM;
593 pref %PP; %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
594 fold %(V(w))%(u[1])P%(SD)_%(XN(w))M}
596 | for m,u in ipairs(sseb) do
597 %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))%(XNM(w))
598 {isa %ISA %(u[4] or ""); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
599 pref %(u[5] and u[5][SD] or PP); %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
600 fold %(V(w))%(u[1])P%(SD)_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
602 | for m,u in ipairs(ssecmp) do
603 %(V(w))CMP%(u[1])P%(SD)_%(XN(w))%(X(w))%(XNM(w)) //= %(V(w))SIMDCMPP%(SD)_%(XN(w))%(X(w))%(XNM(w)).%(u[2])
604 {isa %ISA; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
605 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !par(0xc2) !RRM%(RM)($dst,$src) !I8(!sub(%(u[2])));
606 fold %(V(w))CMP%(u[1])P%(SD)_%(XN(w))%(X(w))M}
608 %(V(w))SHUFP%(SD)_%(XN(w))%(X(w))%(XNM(w))I
609 {isa %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
610 pref %PP; %(VEX_DS(w)); extopcode 0f; coding !par(0xc6) !RRM%(RM)($dst,$src) !I8($imm);
611 fold %(V(w))SHUFP%(SD)_%(XN(w))%(X(w))MI}
612 %(V(w))ROUNDP%(SD)_%(XN(w))%(XNM(w))I
613 {isa %ISA sse41; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
614 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(%(0x08+D)) !RRM%(RM)($dst,$src) !I8($imm);
615 fold %(V(w))ROUNDP%(SD)_%(XN(w))MI}
616 | if SD == "S" or w[2] == 128 then
617 %(V(w))DPP%(SD)_%(XN(w))%(X(w))%(XNM(w))I
618 {isa %ISA sse41; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
619 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x40+D)) !RRM%(RM)($dst,$src) !I8($imm);
620 fold %(V(w))DPP%(SD)_%(XN(w))%(X(w))MI; commute %COMMUTE}
622 %(V(w))BLENDP%(SD)_%(XN(w))%(X(w))%(XNM(w))I
623 {isa %ISA sse41; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
624 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x0c+D)) !RRM%(RM)($dst,$src) !I8($imm);
625 fold %(V(w))BLENDP%(SD)_%(XN(w))%(X(w))MI}
626 | local VRX1, VRMX1 = vreg(SD,w[2]/(2-D)), vrm(w[3],SD,w[2]/(1+D))
627 | local VRX2, VRMX2 = vreg(SD,w[2]/(1+D)), vrm(w[3],SD,w[2]/(2-D))
628 %(V(w))CVTDQ2P%(SD)_%(XNH(w))%(XNMH(w,SD=="D"))
629 {isa %ISA sse2; ops %VR dst/o, %VRMX1 src/i; flags %RMEM;
630 pref %(s_f3(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(s5be6(SD))) !RRM%(RM)($dst,$src);
631 fold %(V(w))CVTDQ2P%(SD)_%(XNH(w))%(MH(w,SD=="D"))}
632 %(V(w))CVTP%(SD)2DQ_%(XNH(w,SD=="D"))%(XNMN(w,SD=="D"))
633 {isa %ISA sse2; ops %VRX2 dst/o, %VRM src/i; flags %RMEM;
634 pref %(s66f2(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(s5be6(SD))) !RRM%(RM)($dst,$src);
635 fold %(V(w))CVTP%(SD)2DQ_%(XNH(w,SD=="D"))%(SD=="D" and w[2]>128 and MN(w[2]) or "M")}
636 %(V(w))CVTTP%(SD)2DQ_%(XNH(w,SD=="D"))%(XNMN(w,SD=="D"))
637 {isa %ISA sse2; ops %VRX2 dst/o, %VRM src/i; flags %RMEM;
638 pref %(sf366(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(s5be6(SD))) !RRM%(RM)($dst,$src);
639 fold %(V(w))CVTTP%(SD)2DQ_%(XNH(w,SD=="D"))%(SD=="D" and w[2]>128 and MN(w[2]) or "M")}
640 %(V(w))CVTP%(SD)2P%(DS)_%(XNH(w,SD=="D"))%(XNMH(w,SD=="S",SD=="D"))
641 {isa %ISA sse2; ops %VRX2 dst/o, %VRMX2 src/i; flags %RMEM;
642 pref %PP; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x5a) !RRM%(RM)($dst,$src);
643 fold %(V(w))CVTP%(SD)2P%(DS)_%(XNH(w,SD=="D"))%(MH(w,SD=="S",SD=="D" and w[2]>128))}
644 | if w[3] == "rr" then
645 %(V(w))MOVMSKP%(SD)_W%(XN(w))
646 {isa %ISA; ops GR32 dst/o, %VR src/i;
647 pref %(s_66(SD)); %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x50) !RRM%(RM)($dst,$src)}
649 | if w[1] == "avx" then
650 VTESTP%(SD)_%(XN(w))%(XNM(w))
651 {isa avx; ops %VR dst/i, %VRM src/i; flags %RMEM wflags;
652 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(0x0e+D)) !RRM%(RM)($dst,$src);
653 fold VTESTP%(SD)_%(XN(w))M; commute dst<->src}
654 | if w[3] == "rm" then
655 VMASKMOVP%(SD)_%(XN(w))%(XN(w))M
656 {isa avx; ops %VR dst/o, %VR src1/i, %VM src/i; flags rmem;
657 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(0x2c+D)) !RRMM($dst,$src)}
658 VMASKMOVP%(SD)_M%(XN(w))%(XN(w))
659 {isa avx; ops %VM dst/i, %VR src1/i, %VR src/i; flags wmem;
660 pref 66; %(VEX_SD(w)); extopcode 0f38; coding !parsub(%(0x2e+D)) !RRMM($src,$dst)}
661 | local vrh = vreg(SD,w[2]/2)
662 | local vrd, vrq = SD=="D" and vrh or VR, SD=="D" and VR or vrh
663 VGATHERDP%(SD)_%(XN(w))M%(XNH(w,SD=="D"))%(XN(w)) //= %(V(w))GATHERP%(SD)_%(XN(w))M%(XN(w)).2
664 {isa avx2; ops %VR dst/io, i%(SDBITS(SD))* src/i, %vrd ix/i, %VR src1/io; flags rmem vsib;
665 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(0x92) !RRMMVSIB($dst,$src,$ix)}
666 VGATHERQP%(SD)_%(XNH(w,SD=="S"))M%(XN(w))%(XNH(w,SD=="S")) //= %(V(w))GATHERP%(SD)_%(XN(w))M%(XN(w)).3
667 {isa avx2; ops %vrq dst/io, i%(SDBITS(SD))* src/i, %VR ix/i, %vrq src1/io; flags rmem vsib;
668 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(0x93) !RRMMVSIB($dst,$src,$ix)}
670 | if SD == "S" or w[2] == 256 then
671 VBROADCASTS%(SD)_%(XN(w))%(XSN(w,SD))
672 {isa %(rrm(w[3],"avx2","avx")); ops %VR dst/o, %(vrm(w[3],SD,SDBITS(SD))) src/i; flags %RMEM;
673 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(0x18+D)) !RRM%(RM)($dst,$src)}
676 VCVTPH2PS_%(XN(w))%(XNMH(w,true))
677 {isa f16c; ops %VR dst/o, %(vrm(w[3],"I",w[2]/2)) src/i; flags %RMEM;
678 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x13) !RRM%(RM)($dst,$src);
679 fold VCVTPH2PS_%(XN(w))%(MH(w,true))}
680 VCVTPS2PH_%(XNMH(w,true))%(XN(w))I
681 {isa f16c; ops %(vrm(w[3],"I",w[2]/2)) dst/%(ROMI), %VR src/i, i8 imm/i; flags %RMEM;
682 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x1d) !RRM%(RM)($src,$dst) !I8($imm);
683 fold VCVTPS2PH_%(MH(w,true))%(XN(w))}
685 VBLENDVP%(SD)_%(XN(w))%(XN(w))%(XNM(w))%(XN(w)) //= VSIMD_AVX_660F3A_40_%(XN(w))%(XN(w))%(XNM(w))%(XN(w)).%(0x0a+D)
686 {isa avx; ops %VR dst/o, %VR src1/i, %VRM src/i, %VR msk/i; flags %RMEM;
687 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x4a+D)) !RRM%(RM)($dst,$src) ($msk<<4);
688 fold VBLENDVP%(SD)_%(XN(w))%(XN(w))M%(XN(w))}
689 VPERMILP%(SD)_%(XN(w))%(XN(w))%(XNM(w)) //= VSIMD_AVX_660F38_00_%(XN(w))%(XN(w))%(XNM(w))%(XN(w)).%(0x0c+D)
690 {isa avx; ops %VR dst/o, %VR src1/i, %VRM src/i; flags %RMEM;
691 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(0x0c+D)) !RRM%(RM)($dst,$src);
692 fold VPERMILP%(SD)_%(XN(w))%(XN(w))M}
693 VPERMILP%(SD)_%(XN(w))%(XNM(w))I //= VSIMD_AVX_660F3A_00_%(XN(w))%(XNM(w))I.%(0x04+D)
694 {isa avx; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
695 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(%(0x04+D)) !RRM%(RM)($dst,$src) !I8($imm);
696 fold VPERMILP%(SD)_%(XN(w))MI}
697 | for m,u in ipairs(ssefma) do
698 | for n,x in ipairs{{"132",0x90},{"213",0xa0},{"231",0xb0}} do
699 VF%(u[1])%(x[1])P%(SD)_%(XN(w))%(XN(w))%(XNM(w))
700 {isa fma3; ops %VR dst/io, %VR src1/i, %VRM src/i; flags %RMEM;
701 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(%(x[2]+u[2])) !RRM%(RM)($dst,$src);
702 fold VF%(u[1])%(x[1])P%(SD)_%(XN(w))%(XN(w))M}
703 | if u[3] and w[2] == 128 then
704 VF%(u[1])%(x[1])S%(SD)_%(XN(w))%(XN(w))%(XNSN(w,SD))
705 {isa fma3; ops %VR dst/io, %VR src1/i, %SRM src/i; flags %RMEM;
706 pref 66; %(VEX_DS(w,D)); extopcode 0f38; coding !parsub(%(x[2]+u[2]+1)) !RRM%(RM)($dst,$src)}
711 BLENDVP%(SD)_X%(XM(w))_XMM0 //= SIMD_SSE41_660F38_10_X%(XM(w))_XMM0.%(0x04+D)
712 {isa sse41; ops %VR dst/io, %VRM src/i, %VR{xmm0} msk/i; flags %RMEM;
713 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(0x14+D)) !RRM%(RM)($dst,$src);
714 fold BLENDVP%(SD)_XM_XMM0}
716 | if w[2] == 128 then
717 | if w[3] == "rr" then
718 %(V(w))MOVS%(SD)_X%(X(w))X
719 {isa %ISA; ops %(DST(w,VR)), %VR src/i;
720 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(0x10) !RRM%(RM)($dst,$src)}
722 %(V(w))MOVHLPS_X%(X(w))X
723 {isa %ISA; ops %(DST(w,VR)), %VR src/i;
724 %(VEX_DS(w)); extopcode 0f; coding !parsub(0x12) !RRM%(RM)($dst,$src)}
725 %(V(w))MOVLHPS_X%(X(w))X
726 {isa %ISA; ops %(DST(w,VR)), %VR src/i;
727 %(VEX_DS(w)); extopcode 0f; coding !parsub(0x16) !RRM%(RM)($dst,$src)}
730 %(V(w))MOVS%(SD)_%(SN(SD))X
731 {isa %ISA; ops %(vmems(SD)) dst/i, %VR src/i; flags %WMEM;
732 pref %PS; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x11) !RRM%(RM)($src,$dst)}
733 %(V(w))MOVS%(SD)_X%(SN(SD))
734 {isa %ISA; ops %VR dst/o, %SM src/i; flags %RMEM;
735 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x10) !RRM%(RM)($dst,$src)}
736 %(V(w))MOVLP%(SD)_X%(X(w))M64 //= %(V(w))MOVLORHP%(SD)_RM.2
737 {isa %ISA; ops %(DST(w,VR)), %(vmem(SD,64)) src/i; flags %RMEM;
738 pref %(s_66(SD)); %(VEX_DS(w)); extopcode 0f; coding !parsub(0x12) !RRM%(RM)($dst,$src)}
739 %(V(w))MOVHP%(SD)_X%(X(w))M64 //= %(V(w))MOVLORHP%(SD)_RM.6
740 {isa %ISA; ops %(DST(w,VR)), %(vmem(SD,64)) src/i; flags %RMEM;
741 pref %(s_66(SD)); %(VEX_DS(w)); extopcode 0f; coding !parsub(0x16) !RRM%(RM)($dst,$src)}
742 %(V(w))MOVLP%(SD)_M64X //= %(V(w))MOVLORHP%(SD)_MR.3
743 {isa %ISA; ops %(vmem(SD,64)) dst/i, %VR src/i; flags %WMEM;
744 pref %(s_66(SD)); %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x13) !RRM%(RM)($src,$dst)}
745 %(V(w))MOVHP%(SD)_M64X //= %(V(w))MOVLORHP%(SD)_MR.7
746 {isa %ISA; ops %(vmem(SD,64)) dst/i, %VR src/i; flags %WMEM;
747 pref %(s_66(SD)); %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x17) !RRM%(RM)($src,$dst)}
750 | for m,u in ipairs(ssec) do
751 %(V(w))%(u[1])SS_X%(X(w))%(XNSN(w,SD))
752 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
753 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)}
755 %(V(w))INSERTPS_X%(X(w))%(XNSN(w,SD))I
756 {isa %ISA sse41; ops %(DST(w,VR)), %SRM src/i, i8 imm/i; flags %RMEM;
757 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x21) !RRM%(RM)($dst,$src) !I8($imm)}
758 %(V(w))EXTRACTPS_%(RNMN(w[3],32))XI
759 {isa %ISA sse41; ops %(grm(w[3],32)) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
760 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x17) !RRM%(RM)($src,$dst) !I8($imm)}
762 | for m,u in ipairs(sse) do
763 %(V(w))%(u[1])S%(SD)_X%(X(w))%(XNSN(w,SD))
764 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
765 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)}
767 | for m,u in ipairs(ssea) do
768 %(V(w))%(u[1])S%(SD)_X%(X(w))%(XNSN(w,SD))
769 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
770 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)}
772 %(V(w))ROUNDS%(SD)_X%(X(w))%(XNSN(w,SD))I
773 {isa %ISA sse41; ops %(DST(w,VR)), %SRM src/i, i8 imm/i; flags %RMEM;
774 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(%(0x0a+D)) !RRM%(RM)($dst,$src) !I8($imm)}
775 | for m,u in ipairs(ssecmp) do
776 %(V(w))CMP%(u[1])S%(SD)_X%(X(w))%(XNSN(w,SD)) //= %(V(w))SIMDCMPS%(SD)_X%(X(w))%(XNSN(w,SD)).%(u[2])
777 {isa %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
778 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !par(0xc2) !RRM%(RM)($dst,$src) !I8(!sub(%(u[2])))}
780 | for m,u in ipairs{{"",0x2f},{"U",0x2e}} do
781 %(V(w))%(u[1])COMIS%(SD)_X%(XNSN(w,SD)) //= %(V(w))SIMDCOMIS%(SD)_X%(XNSN(w,SD)).%(u[2]%16)
782 {isa %ISA; ops %VR dst/i, %SRM src/i; flags %RMEM wflags;
783 pref %PP; %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src)}
785 %(V(w))CVTSI2S%(SD)_X%(X(w))%(RNMN(w[3],32))
786 {isa sse2 %ISA; ops %(DST(w,VR)), %(grm(w[3],32)) src/i; flags %RMEM;
787 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(0x2a) !RRM%(RM)($dst,$src)}
788 %(V(w))CVTSI2S%(SD)_X%(X(w))%(RNMN(w[3],64))
789 {isa sse2 %ISA x64; ops %(DST(w,VR)), %(grm(w[3],64)) src/i; flags %RMEM;
790 pref %PS; %(VEX_DS(w,1)); extopcode 0f; coding !parsub(0x2a) !RRM%(RM)($dst,$src)}
791 %(V(w))CVTS%(SD)2SI_W%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_W%(XNSN(w,SD)).13
792 {isa sse2 %ISA; ops GR32 dst/o, %SRM src/i; flags %RMEM;
793 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x2d) !RRM%(RM)($dst,$src)}
794 %(V(w))CVTS%(SD)2SI_D%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_D%(XNSN(w,SD)).13
795 {isa sse2 %ISA x64; ops GR64 dst/o, %SRM src/i; flags %RMEM;
796 pref %PS; %(VEX_D0S(w,1)); extopcode 0f; coding !parsub(0x2d) !RRM%(RM)($dst,$src)}
797 %(V(w))CVTTS%(SD)2SI_W%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_W%(XNSN(w,SD)).12
798 {isa sse2 %ISA; ops GR32 dst/o, %SRM src/i; flags %RMEM;
799 pref %PS; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x2c) !RRM%(RM)($dst,$src)}
800 %(V(w))CVTTS%(SD)2SI_D%(XNSN(w,SD)) //= %(V(w))CVTXS%(SD)2SI_D%(XNSN(w,SD)).12
801 {isa sse2 %ISA x64; ops GR64 dst/o, %SRM src/i; flags %RMEM;
802 pref %PS; %(VEX_D0S(w,1)); extopcode 0f; coding !parsub(0x2c) !RRM%(RM)($dst,$src)}
803 %(V(w))CVTS%(SD)2S%(DS)_X%(X(w))%(XNSN(w,SD))
804 {isa sse2 %ISA; ops %(DST(w,VR)), %SRM src/i; flags %RMEM;
805 pref %PS; %(VEX_DS(w)); extopcode 0f; coding !parsub(0x5a) !RRM%(RM)($dst,$src)}
808 | if w[3] == "rm" then
810 {isa avx; ops %VR dst/o, %(vmem(SD,128)) src/i; flags %RMEM;
811 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x1a) !RRM%(RM)($dst,$src)}
813 VEXTRACTF128_%(XM(w))YI
814 {isa avx; ops %(vrm(w[3],SD,128)) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
815 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x19) !RRM%(RM)($src,$dst) !I8($imm);
816 fold VEXTRACTF128_M128YI}
817 VINSERTF128_YY%(XM(w))I
818 {isa avx; ops %(DST(w,VR)), %(vrm(w[3],SD,128)) src/i, i8 imm/i; flags %RMEM;
819 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x18) !RRM%(RM)($dst,$src) !I8($imm);
820 fold VINSERTF128_YYM128I}
821 VPERM2F128_YY%(YM(w))I
822 {isa avx; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
823 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x06) !RRM%(RM)($dst,$src) !I8($imm);
824 fold VPERM2F128_YYMI}
826 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
827 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x16) !RRM%(RM)($dst,$src);
832 {isa avx2; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
833 pref 66; %(VEX_D0S(w,1)); extopcode 0f3a; coding !parsub(0x01) !RRM%(RM)($dst,$src) !I8($imm);
838 | for m,u in ipairs(ssec) do
839 %(V(w))%(u[1])PS_%(XN(w))%(XNM(w))
840 {isa %ISA; ops %VR dst/o, %VRM src/i; flags %RMEM;
841 pref %PP; %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
842 fold %(V(w))%(u[1])PS_%(XN(w))M}
844 | for m,u in ipairs{{"L","0x12"},{"H","0x16"}} do
845 | local VRXM = rrm(w[3],VR,dupmem(SD,w[2]))
846 %(V(w))MOVS%(u[1])DUP_%(XN(w))%(XNM(w)) //= %(V(w))MOVSLORHDUP_%(XN(w))%(XNM(w)).%(u[2]%16)
847 {isa sse3 %ISA; ops %VR dst/o, %VRXM src/i; flags %RMEM;
848 pref f3; %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
849 fold %(V(w))MOVS%(u[1])DUP_%(XN(w))M}
852 | local XD, VRXM = XNMH(w,w[3]=="rm" and w[2]==128), rrm(w[3],VR,dupmem(SD,w[2]))
853 %(V(w))MOVDDUP_%(XN(w))%(XD)
854 {isa sse3 %ISA; ops %VR dst/o, %VRXM src/i; flags %RMEM;
855 pref f2; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x12) !RRM%(RM)($dst,$src)}
860 | ------------------------------------
861 | mmx = {{"ADDB",0xfc,1}, {"ADDSB",0xec,1}, {"ADDUSB", 0xdc,1},
862 | {"SUBB",0xf8,0}, {"SUBSB",0xe8,0}, {"SUBUSB", 0xd8,0},
863 | {"CMPEQB",0x74,1}, {"CMPGTB",0x64,0},
864 | {"AND",0xdb,1}, {"ANDN",0xdf,0}, {"OR", 0xeb,1}, {"XOR", 0xef,1},
865 | {"ADDW",0xfd,1}, {"ADDSW",0xed,1}, {"ADDUSW", 0xdd,1},
866 | {"SUBW",0xf9,0}, {"SUBSW",0xe9,0}, {"SUBUSW", 0xd9,0},
867 | {"CMPEQW",0x75,1}, {"CMPGTW",0x65,0},
870 | {"CMPEQD",0x76,1}, {"CMPGTD",0x66,0},
871 | {"MULLW",0xd5,1}, {"MULHW",0xe5,1}, {"MADDWD",0xf5,1},
872 | {"UNPCKLBW",0x60,0}, {"UNPCKHBW",0x68,0}, {"UNPCKLWD",0x61,0}, {"UNPCKHWD",0x69,0},
873 | {"UNPCKLDQ",0x62,0}, {"UNPCKHDQ",0x6a,0},
874 | {"ACKUSWB",0x67,0}, {"ACKSSWB",0x63,0}, {"ACKSSDW",0x6b,0},
875 | {"AVGB",0xe0,1,"mmxext"}, {"AVGW",0xe3,1,"mmxext"},
876 | {"MAXUB",0xde,1,"mmxext"}, {"MAXSW",0xee,1,"mmxext"}, {"MINUB",0xda,1,"mmxext"}, {"MINSW",0xea,1,"mmxext"},
877 | {"MULHUW",0xe4,1,"mmxext"}, {"SADBW",0xf6,1,"mmxext"},
878 | {"ADDQ",0xd4,1,"sse2"}, {"SUBQ",0xfb,0,"sse2"}, {"MULUDQ",0xf4,1,"sse2"},
879 | {"HADDW",0x01,0,"sse3","0f38"}, {"HADDD",0x02,0,"sse3","0f38"}, {"HADDSW", 0x03,0,"sse3","0f38"},
880 | {"HSUBW",0x05,0,"sse3","0f38"}, {"HSUBD",0x06,0,"sse3","0f38"}, {"HSUBSW", 0x07,0,"sse3","0f38"},
881 | {"MADDUBSW",0x04,1,"sse3","0f38"},{"MULHRSW",0x0b,1,"sse3","0f38"},{"SHUFB",0x00,0,"sse3","0f38"},
882 | {"SIGNB",0x08,0,"sse3","0f38"},{"SIGNW",0x09,0,"sse3","0f38"},{"SIGND",0x0a,0,"sse3","0f38"},
883 | {"MAXSB",0x3c,1,"sse41","0f38",true}, {"MAXSD",0x3d,1,"sse41","0f38",true}, {"MAXUW",0x3e,1,"sse41","0f38",true}, {"MAXUD",0x3f,1,"sse41","0f38",true},
884 | {"MINSB",0x38,1,"sse41","0f38",true}, {"MINSD",0x39,1,"sse41","0f38",true}, {"MINUW",0x3a,1,"sse41","0f38",true}, {"MINUD",0x3b,1,"sse41","0f38",true},
885 | {"MULDQ",0x28,1,"sse41","0f38",true}, {"MULLD",0x40,1,"sse41","0f38",true},
886 | {"CMPEQQ",0x29,1,"sse41","0f38",true}, {"CMPGTQ",0x37,0,"sse41","0f38",true},
887 | {"ACKUSDW",0x2b,0,"sse41","0f38",true},
888 | {"UNPCKLQDQ",0x6c,0,nil,nil,true},{"UNPCKHQDQ",0x6d,0,nil,nil,true},
890 | mmx1 = {{"ABSB",0x1c,0,"sse3","0f38"},{"ABSW",0x1d,0,"sse3","0f38"},{"ABSD",0x1e,0,"sse3","0f38"},
891 | {"HMINPOSUW",0x41,0,"sse41","0f38",true},}
892 | local mmxsse41b = {
893 | {"SXBW", 0x20, 2}, {"SXBD", 0x21, 4}, {"SXBQ", 0x22, 8}, {"SXWD", 0x23, 2}, {"SXWQ", 0x24, 4}, {"SXDQ", 0x25, 2},
894 | {"ZXBW", 0x30, 2}, {"ZXBD", 0x31, 4}, {"ZXBQ", 0x32, 8}, {"ZXWD", 0x33, 2}, {"ZXWQ", 0x34, 4}, {"ZXDQ", 0x35, 2},
896 | local mmxsll = {{"SLLW",0xf1,0x71,6}, {"SLLD",0xf2,0x72,6}, {"SLLQ",0xf3,0x73,6},
897 | {"SRLW", 0xd1,0x71,2}, {"SRLD", 0xd2,0x72,2}, {"SRLQ",0xd3,0x73,2},
898 | {"SRAW", 0xe1,0x71,4}, {"SRAD", 0xe2,0x72,4},
900 | for l,w in ipairs{{"sse",128,"rr"},{"sse",128,"rm"},
901 | {"avx",128,"rr"},{"avx",128,"rm"},{"avx",256,"rr"},{"avx",256,"rm"}} do
902 | local VR, VM = vreg("I",w[2]), vmem("I",w[2])
903 | -- code with ONLY reg OR memory variants
904 | local RM, ROMI, RIOMI = rrm(w[3],"R","M"), rrm(w[3], "o", "i"), rrm(w[3], "io", "i")
905 | local VRM, SRM = rrm(w[3],VR,VM), rrm(w[3],VR,SM)
906 | local RMEM, WMEM = rrm(w[3],"","rmem"), rrm(w[3],"","wmem")
907 | local COMMUTE = (w[1]=="avx" and "src1" or "dst").."<->src"
908 | local ISA = (w[1]=="avx" and "avx") or "sse2"
909 | for m,u in ipairs{{"A",0x7f,0x6f,"66"},{"U",0x7f,0x6f,"f3"}} do
910 %(V(w))MOVDQ%(u[1])_%(XN(w))%(XNM(w))
911 {isa %ISA; ops %VR dst/o, %VRM src/i; flags %RMEM;
912 pref %(u[4]); %(VEX_D0S(w)); extopcode 0f; coding !parsub(%(u[3])) !RRM%(RM)($dst,$src);
913 fold %(V(w))MOVDQ%(u[1])_M%(XN(w)) %(V(w))MOVDQ%(u[1])_%(XN(w))M}
914 | if w[3] == "rm" then
915 %(V(w))MOVDQ%(u[1])_%(XNM(w))%(XN(w))
916 {isa %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
917 pref %(u[4]); %(VEX_S0D(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($src,$dst)}
920 | for m,u in ipairs(mmx) do
921 %(V(w))P%(u[1])_%(XN(w))%(X(w))%(XNM(w))
922 {isa %ISA %(u[4] or ""); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
923 pref 66; %(VEX_DS(w)); extopcode %(u[5] or "0f"); coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
924 fold %(V(w))P%(u[1])_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
926 | for m,u in ipairs(mmx1) do
927 | if u[1] ~= "HMINPOSUW" or w[2] == 128 then
928 %(V(w))P%(u[1])_%(XN(w))%(XNM(w))
929 {isa %ISA %(u[4] or ""); ops %VR dst/o, %VRM src/i; flags %RMEM;
930 pref 66; %(VEX_D0S(w)); extopcode %(u[5] or "0f"); coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
931 fold %(V(w))P%(u[1])_%(XN(w))%(X(w))M; commute %(u[3] > 0 and COMMUTE or "")}
934 | for m,u in ipairs(mmxsse41b) do
935 | local RMX = RM == "M" and ("M" .. w[2]/u[3]) or XN({0,w[2]/u[3]})
936 %(V(w))PMOV%(u[1])_%(XN(w))%(RMX)
937 {isa %ISA sse41; ops %VR dst/o, %(vrm(w[3],"I",w[2]/u[3])) src/i; flags %RMEM;
938 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
939 fold %(V(w))PMOV%(u[1])_%(XN(w))%("M" .. w[2]/u[3])}
941 | for m,u in ipairs(mmxsll) do
942 %(V(w))P%(u[1])_%(XN(w))%(X(w))%(XNMN({w[1],128,w[3]},w[2] > 128))
943 {isa %ISA; ops %(DST(w,VR)), %(vrm(w[3],"I",128)) src/i; flags %RMEM;
944 pref 66; %(VEX_DS(w)); extopcode 0f; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
945 fold %(V(w))P%(u[1])_%(XN(w))%(X(w))M}
947 | for m,u in ipairs{{"D","66"}, {"LW","f2"}, {"HW","f3"}} do
948 %(V(w))PSHUF%(u[1])_%(XN(w))%(XNM(w))I
949 {isa %ISA; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
950 pref %(u[2]); %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x70) !RRM%(RM)($dst,$src) !I8($imm);
951 fold %(V(w))PSHUF%(u[1])_%(XN(w))MI}
953 %(V(w))PALIGNR_%(XN(w))%(X(w))%(XNM(w))I
954 {isa ssse3 %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
955 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x0f) !RRM%(RM)($dst,$src) !I8($imm);
956 fold %(V(w))PALIGNR_%(XN(w))%(X(w))MI}
957 %(V(w))MPSADBW_%(XN(w))%(X(w))%(XNM(w))I
958 {isa sse41 %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
959 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x42) !RRM%(RM)($dst,$src) !I8($imm);
960 fold %(V(w))MPSADBW_%(XN(w))%(X(w))MI}
961 %(V(w))PBLENDW_%(XN(w))%(X(w))%(XNM(w))I
962 {isa sse41 %ISA; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
963 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x0e) !RRM%(RM)($dst,$src) !I8($imm);
964 fold %(V(w))PBLENDW_%(XN(w))%(X(w))MI}
965 %(V(w))PTEST_%(XN(w))%(XNM(w))
966 {isa sse41 %ISA; ops %VR dst/i, %VRM src/i; flags %RMEM wflags;
967 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x17) !RRM%(RM)($dst,$src);
968 fold %(V(w))PTEST_%(XN(w))M, commute dst<->src}
970 | if w[3] == "rm" then
971 %(V(w))MOVNTDQA_%(XN(w))%(XNM(w))
972 {isa sse41 %ISA; ops %VR dst/o, %VM src/i; flags %RMEM;
973 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x2a) !RRM%(RM)($dst,$src)}
974 %(V(w))MOVNTDQ_%(XNM(w))%(XN(w))
975 {isa sse2 %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
976 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0xe7) !RRM%(RM)($src,$dst)}
977 %(V(w))MOVNTPD_%(XNM(w))%(XN(w))
978 {isa sse2 %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
979 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x2b) !RRM%(RM)($src,$dst)}
980 %(V(w))MOVNTPS_%(XNM(w))%(XN(w))
981 {isa sse2 %ISA; ops %VM dst/i, %VR src/i; flags %WMEM;
982 pref ; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x2b) !RRM%(RM)($src,$dst)}
983 %(V(w))LDDQU_%(XN(w))%(XNM(w))
984 {isa sse3 %ISA; ops %VR dst/o, %VM src/i; flags %RMEM;
985 pref f2; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0xf0) !RRM%(RM)($dst,$src)}
986 | if w[2] == 128 then
988 {isa %ISA; ops i64* dst/i, %VR src/i; flags %WMEM;
989 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0xd6) !RRM%(RM)($src,$dst)}
990 | else -- w[2] == 256
992 {isa avx2; ops %VR dst/o, %(vmem(w[1],128)) src/i; flags %RMEM;
993 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0x5a) !RRM%(RM)($dst,$src)}
995 | if w[1] == "avx" then
996 | local VR0, VM0 = vreg(w[1],w[2]/2), vmem(w[1],w[2]/2)
997 VPGATHERDD_%(XN(w))M%(XN(w))%(XN(w))
998 {isa avx2; ops %VR dst/io, i32* src/i, %VR ix/i, %VR src1/io; flags %RMEM vsib;
999 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x90) !RRM%(RM)VSIB($dst,$src,$ix)}
1000 VPGATHERDQ_%(XN(w))M%(XNH(w,true))%(XN(w))
1001 {isa avx2; ops %VR dst/io, i64* src/i, %VR0 ix/i, %VR src1/io; flags %RMEM vsib;
1002 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(0x90) !RRM%(RM)VSIB($dst,$src,$ix)}
1003 VPGATHERQD_%(XNH(w,true))M%(XN(w))%(XNH(w,true))
1004 {isa avx2; ops %VR0 dst/io, i32* src/i, %VR ix/i, %VR0 src1/io; flags %RMEM vsib;
1005 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x91) !RRM%(RM)VSIB($dst,$src,$ix)}
1006 VPGATHERQQ_%(XN(w))M%(XN(w))%(XN(w))
1007 {isa avx2; ops %VR dst/io, i64* src/i, %VR ix/i, %VR src1/io; flags %RMEM vsib;
1008 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(0x91) !RRM%(RM)VSIB($dst,$src,$ix)}
1009 VPMASKMOVD_%(XN(w))%(XN(w))M
1010 {isa avx2; ops %VR dst/o, %VR src1/i, %VM src/i; flags %RMEM;
1011 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x8c) !RRM%(RM)($dst,$src)}
1012 VPMASKMOVQ_%(XN(w))%(XN(w))M
1013 {isa avx2; ops %VR dst/o, %VR src1/i, %VM src/i; flags %RMEM;
1014 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(0x8c) !RRM%(RM)($dst,$src)}
1015 VPMASKMOVD_M%(XN(w))%(XN(w))
1016 {isa avx2; ops %VM dst/i, %VR src1/i, %VR src/i; flags %WMEM;
1017 pref 66; %(VEX_SD(w)); extopcode 0f38; coding !parsub(0x8e) !RRM%(RM)($src,$dst)}
1018 VPMASKMOVQ_M%(XN(w))%(XN(w))
1019 {isa avx2; ops %VM dst/i, %VR src1/i, %VR src/i; flags %WMEM;
1020 pref 66; %(VEX_SD(w,1)); extopcode 0f38; coding !parsub(0x8e) !RRM%(RM)($src,$dst)}
1023 %(V(w))PMOVMSKB_W%(XN(w))
1024 {isa %ISA; ops GR32 dst/o, %VR src/i;
1025 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0xd7) !RRM%(RM)($dst,$src)}
1026 | if w[1] == "avx" then
1027 | for m,u in ipairs(mmxsll) do
1028 | --P%(u[1])64_RI { out VRI64 dst : in VRI64 dst,i8 imm } { parm (xx(w[3])) : sub %(w[4]) : rexrr 0 0 dst : code $parm !RRMR($sub,$dst) !I8($imm) : pref 0x0f } {mmx}
1029 %(V(w))P%(u[1])_%(XN(w))%(X(w))I
1030 {isa %ISA; ops %VR src1/o, %VR dst/i, i8 imm/i;
1031 pref 66; %(VEX_0D(w)); extopcode 0f; coding !par(%(u[3])) !RRM%(RM)(!sub(%(u[4])),$dst) !I8($imm)}
1033 | for m,u in ipairs{{"SLLDQ",7}, {"SRLDQ",3}} do
1034 %(V(w))P%(u[1])_%(XN(w))%(X(w))I
1035 {isa %ISA; ops %VR src1/o, %VR dst/i, i8 imm/i;
1036 pref 66; %(VEX_0D(w)); extopcode 0f; coding !par(0x73) !RRM%(RM)(!sub(%(u[2])),$dst) !I8($imm)}
1039 | for m,u in ipairs(mmxsll) do
1040 | --P%(u[1])64_RI { out VRI64 dst : in VRI64 dst,i8 imm } { parm (xx(w[3])) : sub %(w[4]) : rexrr 0 0 dst : code $parm !RRMR($sub,$dst) !I8($imm) : pref 0x0f } {mmx}
1041 %(V(w))P%(u[1])_%(XN(w))%(X(w))I
1042 {isa %ISA; ops %(DST(w,VR)), i8 imm/i;
1043 pref 66; %(VEX_0D(w)); extopcode 0f; coding !par(%(u[3])) !RRM%(RM)(!sub(%(u[4])),$dst) !I8($imm)}
1045 | for m,u in ipairs{{"SLLDQ",7}, {"SRLDQ",3}} do
1046 %(V(w))P%(u[1])_%(XN(w))%(X(w))I
1047 {isa %ISA; ops %(DST(w,VR)), i8 imm/i;
1048 pref 66; %(VEX_0D(w)); extopcode 0f; coding !par(0x73) !RRM%(RM)(!sub(%(u[2])),$dst) !I8($imm)}
1051 | if w[2] == 128 then
1053 {isa %ISA; ops GR32 dst/o, %VR src/i, i8 imm/i;
1054 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !par(0xc5) !RRM%(RM)($dst,$src) !I8($imm)}
1055 %(V(w))MOVQ_%(XN(w))%(RNMN(w[3],64))
1056 {isa %ISA x64; ops %VR dst/o, %(grm(w[3],64)) src/i; flags %RMEM;
1057 pref 66; %(VEX_D0S(w,1)); extopcode 0f; coding !parsub(0x6e) !RRM%(RM)($dst,$src)}
1058 %(V(w))MOVQ_%(RNMN(w[3],64))%(XN(w))
1059 {isa %ISA x64; ops %(grm(w[3],64)) dst/%(ROMI), %VR src/i; flags %WMEM;
1060 pref 66; %(VEX_S0D(w,1)); extopcode 0f; coding !parsub(0x7e) !RRM%(RM)($src,$dst)}
1063 | if w[1] == "sse" then
1064 PBLENDVB_%(XN(w))%(X(w))%(XNM(w))_XMM0
1065 {isa sse41; ops %VR dst/io, %VRM src/i, %VR{xmm0} msk/i; flags %RMEM;
1066 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x10) !RRM%(RM)($dst,$src);
1067 fold PBLENDVB_%(XN(w))%(X(w))M_XMM0}
1069 | for m,u in ipairs{{"B",0x78,8}, {"W",0x79,16}, {"D",0x58,32}, {"Q",0x59,64}} do
1070 VPBROADCAST%(u[1])_%(XN(w))%(XMN(w,u[3]))
1071 {isa avx2; ops %VR dst/o, %(vrm(w[3],"I",u[3])) src/i; flags %RMEM;
1072 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
1073 fold VPBROADCAST%(u[1])_%(XN(w))%(MN(u[3]))}
1075 | for m,u in ipairs{{"SLL",0x47}, {"SRA",0x46}, {"SRL",0x45}} do
1076 VP%(u[1])VD_%(XN(w))%(X(w))%(XNM(w))
1077 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1078 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
1079 fold VP%(u[1])VD_%(XN(w))%(X(w))M}
1080 | if u[1] ~= "SRA" then
1081 VP%(u[1])VQ_%(XN(w))%(X(w))%(XNM(w))
1082 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1083 pref 66; %(VEX_DS(w,1)); extopcode 0f38; coding !parsub(%(u[2])) !RRM%(RM)($dst,$src);
1084 fold VP%(u[1])VQ_%(XN(w))%(X(w))M}
1087 VPBLENDD_%(XN(w))%(X(w))%(XNM(w))I
1088 {isa avx2; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
1089 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x02) !RRM%(RM)($dst,$src) !I8($imm);
1090 fold VPBLENDD_%(XN(w))%(X(w))MI}
1091 VPBLENDVB_%(XN(w))%(X(w))%(XNM(w))%(XN(w))
1092 {isa avx2; ops %(DST(w,VR)), %VRM src/i, %VR msk/i; flags %RMEM;
1093 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x4c) !RRM%(RM)($dst,$src) ($msk<<4);
1094 fold VPBLENDVB_%(XN(w))%(X(w))M%(XN(w))}
1096 | if w[2] == 128 then
1097 %(V(w))MOVD_%(XN(w))%(RNMN(w[3],32))
1098 {isa %ISA; ops %VR dst/o, %(grm(w[3],32)) src/i; flags %RMEM;
1099 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x6e) !RRM%(RM)($dst,$src)}
1100 %(V(w))MOVD_%(RNMN(w[3],32))%(XN(w))
1101 {isa %ISA; ops %(grm(w[3],32)) dst/%(ROMI), %VR src/i; flags %WMEM;
1102 pref 66; %(VEX_S0D(w)); extopcode 0f; coding !parsub(0x7e) !RRM%(RM)($src,$dst)}
1103 %(V(w))MOVQ_%(XN(w))%(XNSN(w, "D"))
1104 {isa %ISA; ops %VR dst/o, %(rrm(w[3],vreg(w[1],64),vmem(w[1],64))) src/i; flags %RMEM;
1105 pref f3; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0x7e) !RRM%(RM)($dst,$src)}
1106 %(V(w))PINSRW_%(XN(w))%(X(w))%(rrm(w[3],RN(32),MN(16)))I
1107 {isa %ISA; ops %(DST(w,VR)), %(rrm(w[3],greg(32),gmem(16))) src/i, i8 imm/i; flags %RMEM;
1108 pref 66; %(VEX_DS(w)); extopcode 0f; coding !parsub(0xc4) !RRM%(RM)($dst,$src) !I8($imm);
1109 fold %(V(w))PINSRW_%(XN(w))%(X(w))%(MN(16))I}
1110 | for m,u in ipairs{{"B",8,"0x14",32,"0x20"}, {"W",16,"0x15",32}, {"D",32,"0x16",32,"0x22"}, {"Q",64,"0x16",64,"0x22"}} do
1111 | local u1x = u[1] == "W" and w[3] == "rr" and "X" or ""
1112 | -- DISABLED sse41 pextrw pattern because we can't check it with nasm
1113 | if #u1x == 0 then -- DISABLED sse41 pextrw patternm because we can't check it with nasm
1114 %(V(w))PEXTR%(u[1])%(u1x)_%(rrm(w[3],RN(u[4]),"M"..u[2]))%(XN(w))I
1115 {isa %ISA sse41 %(u[4]==64 and "x64" or ""); ops %(rrm(w[3],greg(u[4]),gmem(u[2]))) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
1116 pref 66; %(VEX_S0D(w,r64bit(u[4]))); extopcode 0f3a; coding !parsub(%(u[3])) !RRM%(RM)($src,$dst) !I8($imm);
1117 fold %(V(w))PEXTR%(u[1])_%("M"..u[2])%(XN(w))I}
1119 | if u[2] ~= 16 then
1120 %(V(w))PINSR%(u[1])_%(XN(w))%(X(w))%(rrm(w[3],RN(u[4]),"M"..u[2]))I
1121 {isa %ISA sse41 %(u[4]==64 and "x64" or ""); ops %(DST(w,VR)), %(rrm(w[3],greg(u[4]),gmem(u[2]))) src/i, i8 imm/i; flags %RMEM;
1122 pref 66; %(VEX_DS(w,r64bit(u[4]))); extopcode 0f3a; coding !parsub(%(u[5])) !RRM%(RM)($dst,$src) !I8($imm);
1123 fold %(V(w))PINSR%(u[1])_%(XN(w))%(X(w))%("M"..u[2])I}
1126 | if w[3] == "rr" then
1127 %(V(w))MASKMOVDQU_RDI_%(XN(w))%(XN(w))
1128 {isa %ISA sse2; ops GR32{edi} tgt/i, %VR dst/i, %VR src/i; flags wmem;
1129 pref 66; %(VEX_D0S(w)); extopcode 0f; coding !parsub(0xf7) !RRMR($dst,$src)}
1131 %(V(w))PCMPESTRI_ECX_%(XN(w))_EAX_%(XNM(w))_EDX_I
1132 {isa %ISA sse42; ops GR32{ecx} ix/o, %VR dst/i, GR32{eax} dstlen/i, %VRM src/i, GR32{edx} srclen/i, i8 imm/i; flags %RMEM wflags;
1133 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x61) !RRM%(RM)($dst,$src) !I8($imm);
1134 fold %(V(w))PCMPESTRI_ECX_%(XN(w))_EAX_M_EDX_I}
1135 %(V(w))PCMPESTRM_XMM0_%(XN(w))_EAX_%(XNM(w))_EDX_I
1136 {isa %ISA sse42; ops %VR{xmm0} ix/o, %VR dst/i, GR32{eax} dstlen/i, %VRM src/i, GR32{edx} srclen/i, i8 imm/i; flags %RMEM wflags;
1137 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x60) !RRM%(RM)($dst,$src) !I8($imm);
1138 fold %(V(w))PCMPESTRM_XMM0_%(XN(w))_EAX_M_EDX_I}
1139 %(V(w))PCMPISTRI_ECX_%(XN(w))%(XNM(w))I
1140 {isa %ISA sse42; ops GR32{ecx} ix/o, %VR dst/i, %VRM src/i, i8 imm/i; flags %RMEM wflags;
1141 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x63) !RRM%(RM)($dst,$src) !I8($imm);
1142 fold %(V(w))PCMPISTRI_ECX_%(XN(w))MI}
1143 %(V(w))PCMPISTRM_XMM0_%(XN(w))%(XNM(w))I
1144 {isa %ISA sse42; ops %VR{xmm0} ix/o, %VR dst/i, %VRM src/i, i8 imm/i; flags %RMEM wflags;
1145 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0x62) !RRM%(RM)($dst,$src) !I8($imm);
1146 fold %(V(w))PCMPISTRM_XMM0_%(XN(w))MI}
1147 %(V(w))AESDEC_%(XN(w))%(X(w))%(XNM(w))
1148 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1149 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xde) !RRM%(RM)($dst,$src);
1150 fold %(V(w))AESDEC_%(XN(w))%(X(w))M }
1151 %(V(w))AESDECLAST_%(XN(w))%(X(w))%(XNM(w))
1152 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1153 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdf) !RRM%(RM)($dst,$src);
1154 fold %(V(w))AESDECLAST_%(XN(w))%(X(w))M}
1155 %(V(w))AESENC_%(XN(w))%(X(w))%(XNM(w))
1156 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1157 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdc) !RRM%(RM)($dst,$src);
1158 fold %(V(w))AESENC_%(XN(w))%(X(w))M}
1159 %(V(w))AESENCLAST_%(XN(w))%(X(w))%(XNM(w))
1160 {isa aes %(w[1]); ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1161 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0xdd) !RRM%(RM)($dst,$src);
1162 fold %(V(w))AESENCLAST_%(XN(w))%(X(w))M}
1163 %(V(w))AESIMC_%(XN(w))%(XNM(w))
1164 {isa aes %(w[1]); ops %VR dst/o, %VRM src/i; flags %RMEM;
1165 pref 66; %(VEX_D0S(w)); extopcode 0f38; coding !parsub(0xdb) !RRM%(RM)($dst,$src);
1166 fold %(V(w))AESIMC_%(XN(w))M}
1167 %(V(w))AESKEYGENASSIST_%(XN(w))%(XNM(w))I
1168 {isa aes %(w[1]); ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
1169 pref 66; %(VEX_D0S(w)); extopcode 0f3a; coding !parsub(0xdf) !RRM%(RM)($dst,$src) !I8($imm);
1170 fold %(V(w))AESKEYGENASSIST_%(XN(w))MI}
1171 %(V(w))PCLMULQDQ_%(XN(w))%(X(w))%(XNM(w))I
1172 {isa clmul %(w[1]); ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
1173 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x44) !RRM%(RM)($dst,$src) !I8($imm);
1174 fold %(V(w))PCLMULQDQ_%(XN(w))%(X(w))MI}
1175 | else -- w[2] == 256
1177 {isa avx2; ops %(DST(w,VR)), %VRM src/i; flags %RMEM;
1178 pref 66; %(VEX_DS(w)); extopcode 0f38; coding !parsub(0x36) !RRM%(RM)($dst,$src);
1181 {isa avx2; ops %VR dst/o, %VRM src/i, i8 imm/i; flags %RMEM;
1182 pref 66; %(VEX_D0S(w,1)); extopcode 0f3a; coding !parsub(0x00) !RRM%(RM)($dst,$src) !I8($imm);
1184 VPERM2I128_YY%(YM(w))I
1185 {isa avx2; ops %(DST(w,VR)), %VRM src/i, i8 imm/i; flags %RMEM;
1186 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x46) !RRM%(RM)($dst,$src) !I8($imm);
1187 fold VPERM2I128_YYMI}
1188 VEXTRACTI128_%(XM(w))YI
1189 {isa avx2; ops %(rrm(w[3],vreg("I",128),vmem("I",128))) dst/%(ROMI), %VR src/i, i8 imm/i; flags %WMEM;
1190 pref 66; %(VEX_S0D(w)); extopcode 0f3a; coding !parsub(0x39) !RRM%(RM)($src,$dst) !I8($imm);
1191 fold VEXTRACTI128_M128YI}
1192 VINSERTI128_YY%(XM(w))I
1193 {isa avx2; ops %(DST(w,VR)), %(rrm(w[3],vreg("I",128),vmem("I",128))) src/i, i8 imm/i; flags %RMEM;
1194 pref 66; %(VEX_DS(w)); extopcode 0f3a; coding !parsub(0x38) !RRM%(RM)($dst,$src) !I8($imm);
1195 fold VINSERTI128_YYM128I}
1198 VZEROALL {isa avx; vex rr 0 0 0 0 1; extopcode 0f; coding !parsub(0x77)}
1199 VZEROUPPER {isa avx; vex rr 0 0 0 0 0; extopcode 0f; coding !parsub(0x77)}
1201 | --MOVDV64_32_RR { out VRI64 dst : in GR32 src } {parm 0x6e : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {mmx}
1202 | --MOVDV64_32_RM { out VRI64 dst : in i32* src : rmem } {parm 0x6e : rexrm 0 dst src : code $parm !RRMM($dst,$src) : pref 0x0f} {mmx}
1203 | --MOVD32_V64_MR { in i32* dst, VRI64 src : wmem } {parm 0x7e : rexrm 0 src dst : code $parm !RRMM($src,$dst) : pref 0x0f} {mmx}
1204 | --MOVD32_V64_RR { out GR32 dst : in VRI64 src } {parm 0x7e : rexrr 0 src dst : code $parm !RRMR($src,$dst) : pref 0x0f} {mmx}
1206 | --MOVQV64_64_RR { out VRI64 dst : in GR64 src } {parm 0x6e : rexrr 1 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {x64}
1207 | --MOVQV64_64_RM { out VRI64 dst : in i64* src : rmem } {parm 0x6e : rexrm 1 dst src : code $parm !RRMM($dst,$src) : pref 0x0f} {x64}
1208 | --MOVQ64_V64_MR { in i64* dst, VRI64 src : wmem } {parm 0x7e : rexrm 1 src dst : code $parm !RRMM($src,$dst) : pref 0x0f} {x64}
1209 | --MOVQ64_V64_RR { out GR64 dst : in VRI64 src } {parm 0x7e : rexrr 1 src dst : code $parm !RRMR($src,$dst) : pref 0x0f} {x64}
1211 | --MOVQV64_MR { in VI64* dst, VRI64 src : wmem } {parm 0x7f : rexrm 0 src dst : code $parm !RRMM($src,$dst) : pref 0x0f} {mmx}
1212 | --MOVQV64_RR { out VRI64 dst : in VRI64 src } {parm 0x6f : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {mmx}
1213 | --MOVQV64_RM { out VRI64 dst : in VI64* src : rmem } {parm 0x6f : rexrm 0 dst src : code $parm !RRMM($dst,$src) : pref 0x0f} {mmx}
1214 | -- ----------------------------------
1216 | --MOVQ2DQ_RR { out VRI128 dst : in VRI64 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf2} {sse2}
1217 | --MOVDQ2Q_RR { out VRI64 dst : in VRI128 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf3} {sse2}
1218 | --MOVQV128_V64_RR { out VRI128 dst : in VRI64 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf2} {sse2}
1219 | --MOVQV64_V128_RR { out VRI64 dst : in VRI128 src} {parm 0xd6 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f 0xf3} {sse2}
1220 | -- ----------------------------------
1221 | --PMOVMSK64_RR { out GR32 dst : in VRI64 src } {parm 0xd7 : rexrr 0 dst src : code $parm !RRMR($dst,$src) : pref 0x0f} {mmxext}
1222 | --PSHUFW64_RRI { out VRI64 dst : in VRI64 src,i8 imm } { parm 0x70 : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1223 | --PSHUFW64_RMI { out VRI64 dst : in VI64* src,i8 imm : rmem } { parm 0x70 : rexrm 0 dst src : code $parm !RRMM($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1225 LDMXCSR_M32 //= LDMXCSR.2
1226 {isa sse; ops i32* dst/i; flags rmem; rex rm 0 0 dst; extopcode 0f; coding !par(0xae) !RRMM(!sub(2),$dst)}
1227 STMXCSR_M32 //= STMXCSR.3
1228 {isa sse; ops i32* dst/i; flags wmem; rex rm 0 0 dst; extopcode 0f; coding !par(0xae) !RRMM(!sub(3),$dst)}
1230 | --PINSRW64_RRI { out VRI64 dst : in VRI64 dst, GR32 src,i8 imm } { parm 0xc4 : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1231 | --PINSRW64_RMI { out VRI64 dst : in VRI64 dst, i16* src,i8 imm : rmem } { parm 0xc4 : rexrm 0 dst src : code $parm !RRMM($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1233 | --PEXTRW64_RRI { out GR32 dst : in VRI64 src,i8 imm } { parm 0xc5 : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f } {mmxext}
1234 | -- there exist 64bit versions of pinsrw & pextrw, but they don't make sense, do they?
1236 | --PALIGNR64_RR { out VRI64 dst : in VRI64 dst,VRI64 src,i8 imm } { parm 0x0f : rexrr 0 dst src : code $parm !RRMR($dst,$src) !I8($imm) : pref 0x0f3a } {ssse3}
1237 | --PALIGNR64_RM { out VRI64 dst : in VRI64 dst,VI64* src,i8 imm : rmem } { parm 0x0f : rexrm 0 dst src : code $parm !RRMM($dst,$src) !I8($imm) : pref 0x0f3a } {ssse3}