1 | for Y = [128, 256] do
3 | for X = [ADD, MUL, MIN, MAX, SUB, DIV] do
4 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
5 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
6 ?(Y=="128") V$(X)S$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
7 ?(Y=="128") V$(X)S$(M){_$(VSZ(Y))$(VSZ(Y))M$(MVSZ(M))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
10 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/w,XMM2/r}
11 V$(X)P$(M){_$(VSZ(Y))M} $(VNAME("XMM1",Y)), [RAX] {|XMM1/w,RAX/r}
12 ?(Y=="128") V$(X)S$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
13 ?(Y=="128") V$(X)S$(M){_$(VSZ(Y))$(VSZ(Y))M$(MVSZ(M))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
15 | for X = [ADDSUB, HADD, HSUB] do
16 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
17 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
19 | for X = [CMPEQ, CMPLT, CMPLE, CMPUNORD, CMPNEQ, CMPNLT, CMPNLE, CMPORD] do
20 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
21 V$(X)P$(M){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
22 ?(Y=="128") V$(X)S$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
23 ?(Y=="128") V$(X)S$(M){_$(VSZ(Y))$(VSZ(Y))M$(MVSZ(M))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
25 VROUNDP$(M){_$(VSZ(Y))$(VSZ(Y))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), 2 {|XMM1/w,XMM2/r}
26 VROUNDP$(M){_$(VSZ(Y))MI} $(VNAME("XMM1",Y)), [RAX], 2 {|XMM1/w,RAX/r}
27 ?(Y=="128") VROUNDS$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)),$(VNAME("XMM3",Y)), 2 {|XMM1/w,XMM2/r,XMM3/r}
28 ?(Y=="128") VROUNDS$(M){_$(VSZ(Y))$(VSZ(Y))M$(MVSZ(M))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX], 2 {|XMM1/w,XMM2/r,RAX/r}
29 ?(Y=="128" or M=="S") VDPP$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)), 2 {|XMM1/w,XMM2/r,XMM3/r}
30 ?(Y=="128" or M=="S") VDPP$(M){_$(VSZ(Y))$(VSZ(Y))MI} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX], 2 {|XMM1/w,XMM2/r,RAX/r}
31 ?(Y=="128") VCOMIS$(M){_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/r,XMM2/r,FLAGS/w}
32 ?(Y=="128") VCOMIS$(M){_$(VSZ(Y))M$(MVSZ(M))} $(VNAME("XMM1",Y)), [RAX] {|XMM1/r,RAX/r,FLAGS/w}
33 ?(Y=="128") VUCOMIS$(M){_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/r,XMM2/r,FLAGS/w}
34 ?(Y=="128") VUCOMIS$(M){_$(VSZ(Y))M$(MVSZ(M))} $(VNAME("XMM1",Y)), [RAX] {|XMM1/r,RAX/r,FLAGS/w}
35 VTESTP$(M){_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/r,XMM2/r,FLAGS/w}
36 VTESTP$(M){_$(VSZ(Y))M} $(VNAME("XMM1",Y)), [RAX] {|XMM1/r,RAX/r,FLAGS/w}
38 | for X = [RSQRT, RCP] do
39 V$(X)PS{_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/w,XMM2/r}
40 V$(X)PS{_$(VSZ(Y))M} $(VNAME("XMM1",Y)), [RAX] {|XMM1/w,RAX/r}
41 ?(Y=="128") V$(X)SS{_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
42 ?(Y=="128") V$(X)SS{_$(VSZ(Y))$(VSZ(Y))M32} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
45 | for X = [ANDPS, ANDNPS, ORPS, XORPS, ANDPD, ANDNPD, ORPD, XORPD, PAND, PANDN, POR, PXOR] do
46 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
47 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
49 | for X = [MOVMSKPS, MOVMSKPD, PMOVMSKB] do
50 V$(X){_W$(VSZ(Y))} ECX, $(VNAME("XMM2",Y)) {|RCX/w,XMM2/r}
52 | for X = [PSLLW, PSLLD, PSLLQ, PSRLW, PSRLD, PSRLQ, PSRAW, PSRAD] do
53 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(128))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",128)) {|XMM1/w,XMM2/r,XMM3/r}
54 V$(X){_$(VSZ(Y))$(VSZ(Y))M$(VOSZ(Y,128))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
55 V$(X){_$(VSZ(Y))$(VSZ(Y))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), 1 {|XMM1/w,XMM2/r}
57 | for X = [PSLLDQ, PSRLDQ] do
58 V$(X){_$(VSZ(Y))$(VSZ(Y))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), 1 {|XMM1/w,XMM2/r}
61 | for X = [PADDB, PADDSB, PADDUSB, PSUBB, PSUBSB, PSUBUSB, PCMPEQB, PCMPGTB] do
62 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
63 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
65 | for X = [PADDW, PADDSW, PADDUSW, PSUBW, PSUBSW, PSUBUSW, PCMPEQW, PCMPGTW] do
66 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
67 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
69 | for X = [PADDD, PSUBD, PCMPEQD, PCMPGTD, PADDQ, PSUBQ, PCMPEQQ, PCMPGTQ] do
70 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
71 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
73 | for X = [PSADBW, PHADDW, PHADDD, PHADDSW, PHSUBW, PHSUBD, PHSUBSW] do
74 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
75 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
77 | for X = [PMULLW, PMULHW, PMADDWD, PMULHUW, PMULUDQ, PMADDUBSW, PMULHRSW, PMULDQ, PMULLD] do
78 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
79 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
81 | for X = [PMAXUB, PMAXUW, PMAXUD, PMAXSB, PMAXSW, PMAXSD, PMINUB, PMINUW, PMINUD, PMINSB, PMINSW, PMINSD] do
82 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
83 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
85 | for X = [PAVGB, PAVGW, PSIGNB, PSIGNW, PSIGND] do
86 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
87 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
89 | for X = [PABSB, PABSW, PABSD] do
90 V$(X){_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/w,XMM2/r}
91 V$(X){_$(VSZ(Y))M} $(VNAME("XMM1",Y)), [RAX] {|XMM1/w,RAX/r}
93 VPTEST{_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)) {|XMM1/r,XMM2/r,FLAGS/w}
94 VPTEST{_$(VSZ(Y))M} $(VNAME("XMM1",Y)), [RAX] {|XMM1/r,RAX/r,FLAGS/w}
95 ?(Y=="128") VPHMINPOSUW{_$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM3/r}
96 ?(Y=="128") VPHMINPOSUW{_$(VSZ(Y))M} $(VNAME("XMM1",Y)), [RAX] {|XMM1/w,RAX/r}
97 VMPSADBW{_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))I} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)), 2 {|XMM1/w,XMM2/r,XMM3/r}
98 VMPSADBW{_$(VSZ(Y))$(VSZ(Y))MI} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX], 2 {|XMM1/w,XMM2/r,RAX/r}
101 | for X = [PCMPESTRI] do
102 V$(X){_ECX_X_EAX_X_EDX_I} XMM1, XMM2, 2 {|XMM1/r,XMM2/r,ECX/w,EAX/r,EDX/r}
103 V$(X){_ECX_X_EAX_M_EDX_I} XMM1, [RBX], 2 {|XMM1/r,RBX/r,ECX/w,EAX/r,EDX/r}
105 | for X = [PCMPESTRM] do
106 V$(X){_XMM0_X_EAX_X_EDX_I} XMM1, XMM2, 2 {|XMM1/r,XMM2/r,XMM0/w,EAX/r,EDX/r}
107 V$(X){_XMM0_X_EAX_M_EDX_I} XMM1, [RBX], 2 {|XMM1/r,RBX/r,XMM0/w,EAX/r,EDX/r}
109 | for X = [PCMPISTRI] do
110 V$(X){_ECX_XXI} XMM1, XMM2, 2 {|XMM1/r,XMM2/r,ECX/w}
111 V$(X){_ECX_XMI} XMM1, [RBX], 2 {|XMM1/r,RBX/r,ECX/w}
113 | for X = [PCMPISTRM] do
114 V$(X){_XMM0_XXI} XMM1, XMM2, 2 {|XMM1/r,XMM2/r,XMM0/w}
115 V$(X){_XMM0_XMI} XMM1, [RBX], 2 {|XMM1/r,RBX/r,XMM0/w}
119 | for Y = [128, 256] do
120 | for X = [PSLLVD, PSLLVQ, PSRLVD, PSRLVQ, PSRAVD] do
121 V$(X){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
122 V$(X){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
125 | for O = [132, 213, 231] do
126 | for X = [FMADD, FMSUB, FNMADD, FNMSUB] do
127 V$(X)$(O)P$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
128 V$(X)$(O)P$(M){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
129 ?(Y=="128") V$(X)$(O)S$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
130 ?(Y=="128") V$(X)$(O)S$(M){_$(VSZ(Y))$(VSZ(Y))M$(MVSZ(M))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
132 | for X = [FMADDSUB, FMSUBADD] do
133 V$(X)$(O)P$(M){_$(VSZ(Y))$(VSZ(Y))$(VSZ(Y))} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), $(VNAME("XMM3",Y)) {|XMM1/w,XMM2/r,XMM3/r}
134 V$(X)$(O)P$(M){_$(VSZ(Y))$(VSZ(Y))M} $(VNAME("XMM1",Y)), $(VNAME("XMM2",Y)), [RAX] {|XMM1/w,XMM2/r,RAX/r}
140 VAESKEYGENASSIST{_XXI} XMM1, XMM2, 1 {|XMM1/w,XMM2/r}
141 VAESKEYGENASSIST{_XMI} XMM1, [RAX], 1 {|XMM1/w,RAX/r}
142 VAESIMC{_XX} XMM1, XMM2 {|XMM1/w,XMM2/r}
143 VAESIMC{_XM} XMM1, [RAX] {|XMM1/w,RAX/r}
144 | for X = [AESENC, AESENCLAST, AESDEC, AESDECLAST] do
145 V$(X){_XXX} XMM1, XMM2, XMM3 {|XMM1/w,XMM2/r,XMM3/r}
146 V$(X){_XXM} XMM1, XMM2, [RAX] {|XMM1/w,XMM2/r,RAX/r}
148 VPCLMULQDQ{_XXXI} XMM1, XMM2, XMM3, 1 {|XMM1/w,XMM2/r,XMM3/r}
149 VPCLMULQDQ{_XXMI} XMM1, XMM2, [RAX], 1 {|XMM1/w,XMM2/r,RAX/r}