1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_string.cc,v 1.11 2008/09/06 18:21:29 sshwarts Exp $
3 /////////////////////////////////////////////////////////////////////////
5 // Copyright (c) 2007 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 /////////////////////////////////////////////////////////////////////////
24 #define NEED_CPU_REG_SHORTCUTS 1
27 #define LOG_THIS BX_CPU_THIS_PTR
29 // Make code more tidy with a few macros.
30 #if BX_SUPPORT_X86_64==0
34 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
36 // Compare all pairs of Ai, Bj according to imm8 control
37 static void compare_strings(Bit8u BoolRes
[16][16], BxPackedXmmRegister op1
, BxPackedXmmRegister op2
, Bit8u imm
)
40 unsigned aggregation_operation
= (imm
>> 2) & 3;
42 // All possible comparisons are performed, the individual boolean
43 // results of those comparisons are referred by
44 // BoolRes[op2 element index, op1 element index]
47 case 0: /* unsigned bytes compare */
50 switch (aggregation_operation
) {
51 case 0: /* 'equal' comparison */
54 BoolRes
[j
][i
] = (op1
.xmmubyte(i
) == op2
.xmmubyte(j
));
56 case 1: /* 'ranges' comparison */
58 BoolRes
[j
][i
] = (op1
.xmmubyte(i
) <= op2
.xmmubyte(j
));
60 BoolRes
[j
][i
] = (op1
.xmmubyte(i
) >= op2
.xmmubyte(j
));
67 case 1: /* unsigned words compare */
70 switch (aggregation_operation
) {
71 case 0: /* 'equal' comparison */
74 BoolRes
[j
][i
] = (op1
.xmm16u(i
) == op2
.xmm16u(j
));
76 case 1: /* 'ranges' comparison */
78 BoolRes
[j
][i
] = (op1
.xmm16u(i
) <= op2
.xmm16u(j
));
80 BoolRes
[j
][i
] = (op1
.xmm16u(i
) >= op2
.xmm16u(j
));
87 case 2: /* signed bytes compare */
90 switch (aggregation_operation
) {
91 case 0: /* 'equal' comparison */
94 BoolRes
[j
][i
] = (op1
.xmmsbyte(i
) == op2
.xmmsbyte(j
));
96 case 1: /* 'ranges' comparison */
98 BoolRes
[j
][i
] = (op1
.xmmsbyte(i
) <= op2
.xmmsbyte(j
));
100 BoolRes
[j
][i
] = (op1
.xmmsbyte(i
) >= op2
.xmmsbyte(j
));
107 case 3: /* signed words compare */
110 switch (aggregation_operation
) {
111 case 0: /* 'equal' comparison */
114 BoolRes
[j
][i
] = (op1
.xmm16s(i
) == op2
.xmm16s(j
));
116 case 1: /* 'ranges' comparison */
118 BoolRes
[j
][i
] = (op1
.xmm16s(i
) <= op2
.xmm16s(j
));
120 BoolRes
[j
][i
] = (op1
.xmm16s(i
) >= op2
.xmm16s(j
));
129 static unsigned find_eos32(Bit32s reg32
, Bit8u imm
)
131 if (imm
& 0x1) { // 8 elements
132 if (reg32
> 8 || reg32
< -8) return 8;
133 else return abs(reg32
);
135 else { // 16 elements
136 if (reg32
> 16 || reg32
< -16) return 16;
137 else return abs(reg32
);
141 #if BX_SUPPORT_X86_64
142 static unsigned find_eos64(Bit64s reg64
, Bit8u imm
)
144 if (imm
& 0x1) { // 8 elements
145 if (reg64
> 8 || reg64
< -8) return 8;
146 else return abs(reg64
);
148 else { // 16 elements
149 if (reg64
> 16 || reg64
< -16) return 16;
150 else return abs(reg64
);
155 static unsigned find_eos(BxPackedXmmRegister op
, Bit8u imm
)
159 if (imm
& 0x1) { // 8 elements
161 if (op
.xmm16u(i
) == 0) break;
163 else { // 16 elements
165 if (op
.xmmubyte(i
) == 0) break;
171 static bx_bool
override_if_data_invalid(bx_bool val
, bx_bool i_valid
, bx_bool j_valid
, Bit8u imm
)
173 unsigned aggregation_operation
= (imm
>> 2) & 3;
175 switch(aggregation_operation
) {
176 case 0: // 'equal any'
178 if (! i_valid
|| ! j_valid
) // one of the elements is invalid
182 case 2: // 'equal each'
184 if (! j_valid
) return 1; // both elements are invalid
185 else return 0; // only i is invalid
188 if (! j_valid
) return 0; // only j is invalid
192 case 3: // 'equal ordered'
193 if (! i_valid
) { // element i is invalid
197 if (! j_valid
) { // only j is invalid
207 static Bit16u
aggregate(Bit8u BoolRes
[16][16], unsigned len1
, unsigned len2
, Bit8u imm
)
209 unsigned aggregation_operation
= (imm
>> 2) & 3;
210 unsigned num_elements
= (imm
& 0x1) ? 8 : 16;
211 unsigned polarity
= (imm
>> 4) & 3;
216 switch(aggregation_operation
) {
217 case 0: // 'equal any'
218 for(j
=0; j
<num_elements
; j
++) {
220 for(i
=0; i
<num_elements
; i
++) {
221 if (override_if_data_invalid(BoolRes
[j
][i
], (i
< len1
), (j
< len2
), imm
)) {
233 for(j
=0; j
<num_elements
; j
++) {
235 for(i
=0; i
<num_elements
; i
+=2) {
236 if (override_if_data_invalid(BoolRes
[j
][i
], (i
< len1
), (j
< len2
), imm
) &&
237 override_if_data_invalid(BoolRes
[j
][i
+1], (i
+1 < len1
), (j
< len2
), imm
)) {
248 case 2: // 'equal each'
249 for(j
=0; j
<num_elements
; j
++) {
250 if (override_if_data_invalid(BoolRes
[j
][j
], (j
< len1
), (j
< len2
), imm
))
255 case 3: // 'equal ordered'
256 for(j
=0; j
<num_elements
; j
++) {
258 for (i
=0, k
=j
; (i
< num_elements
-j
) && (k
< num_elements
); i
++, k
++) {
259 if (! override_if_data_invalid(BoolRes
[k
][i
], (i
< len1
), (k
< len2
), imm
)) {
277 result
^= (num_elements
== 8) ? 0xFF : 0xFFFF;
281 for (j
=0;j
<num_elements
;j
++)
282 if (j
< len2
) result
^= (1<<j
); // flip the bit
289 #endif // (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
291 // for 3-byte opcodes
292 #if (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)
295 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPESTRM_VdqWdqIb(bxInstruction_c
*i
)
297 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
298 BX_CPU_THIS_PTR
prepareSSE();
300 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
301 Bit8u imm8
= i
->Ib();
303 /* op2 is a register or memory reference */
305 op2
= BX_READ_XMM_REG(i
->rm());
308 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
309 /* pointer, segment address pair */
310 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
313 // compare all pairs of Ai, Bj
314 Bit8u BoolRes
[16][16];
315 compare_strings(BoolRes
, op1
, op2
, imm8
);
316 unsigned len1
, len2
, num_elements
= (imm8
& 0x1) ? 8 : 16;
318 #if BX_SUPPORT_X86_64
320 len1
= find_eos64(RAX
, imm8
);
321 len2
= find_eos64(RDX
, imm8
);
326 len1
= find_eos32(EAX
, imm8
);
327 len2
= find_eos32(EDX
, imm8
);
329 Bit16u result2
= aggregate(BoolRes
, len1
, len2
, imm8
);
331 // As defined by imm8[6], result2 is then either stored to the least
332 // significant bits of XMM0 (zero extended to 128 bits) or expanded
333 // into a byte/word-mask and then stored to XMM0
335 if (num_elements
== 8) {
336 for (int index
= 0; index
< 8; index
++)
337 result
.xmm16u(index
) = (result2
& (1<<index
)) ? 0xffff : 0;
339 else { // num_elements = 16
340 for (int index
= 0; index
< 16; index
++)
341 result
.xmmubyte(index
) = (result2
& (1<<index
)) ? 0xff : 0;
345 result
.xmm64u(1) = 0;
346 result
.xmm64u(0) = (Bit64u
) result2
;
350 if (result2
!= 0) flags
|= EFlagsCFMask
;
351 if (len1
< num_elements
) flags
|= EFlagsSFMask
;
352 if (len2
< num_elements
) flags
|= EFlagsZFMask
;
354 flags
|= EFlagsOFMask
;
355 setEFlagsOSZAPC(flags
);
357 BX_WRITE_XMM_REG(0, result
); /* store result XMM0 */
359 BX_INFO(("PCMPESTRM_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
360 exception(BX_UD_EXCEPTION
, 0, 0);
365 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPESTRI_VdqWdqIb(bxInstruction_c
*i
)
367 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
368 BX_CPU_THIS_PTR
prepareSSE();
370 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
371 Bit8u imm8
= i
->Ib();
373 /* op2 is a register or memory reference */
375 op2
= BX_READ_XMM_REG(i
->rm());
378 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
379 /* pointer, segment address pair */
380 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
383 // compare all pairs of Ai, Bj
384 Bit8u BoolRes
[16][16];
385 compare_strings(BoolRes
, op1
, op2
, imm8
);
386 unsigned len1
, len2
, num_elements
= (imm8
& 0x1) ? 8 : 16;
389 #if BX_SUPPORT_X86_64
391 len1
= find_eos64(RAX
, imm8
);
392 len2
= find_eos64(RDX
, imm8
);
397 len1
= find_eos32(EAX
, imm8
);
398 len2
= find_eos32(EDX
, imm8
);
400 Bit16u result2
= aggregate(BoolRes
, len1
, len2
, imm8
);
402 // The index of the first (or last, according to imm8[6]) set bit of result2
403 // is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
405 // The index returned to ECX is of the MSB in result2
406 for (index
=num_elements
-1; index
>=0; index
--)
407 if (result2
& (1<<index
)) break;
408 if (index
< 0) index
= num_elements
;
411 // The index returned to ECX is of the LSB in result2
412 for (index
=0; index
<(int)num_elements
; index
++)
413 if (result2
& (1<<index
)) break;
418 if (result2
!= 0) flags
|= EFlagsCFMask
;
419 if (len1
< num_elements
) flags
|= EFlagsSFMask
;
420 if (len2
< num_elements
) flags
|= EFlagsZFMask
;
422 flags
|= EFlagsOFMask
;
423 setEFlagsOSZAPC(flags
);
426 BX_INFO(("PCMPESTRI_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
427 exception(BX_UD_EXCEPTION
, 0, 0);
432 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPISTRM_VdqWdqIb(bxInstruction_c
*i
)
434 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
435 BX_CPU_THIS_PTR
prepareSSE();
437 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
, result
;
438 Bit8u imm8
= i
->Ib();
440 /* op2 is a register or memory reference */
442 op2
= BX_READ_XMM_REG(i
->rm());
445 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
446 /* pointer, segment address pair */
447 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
450 // compare all pairs of Ai, Bj
451 Bit8u BoolRes
[16][16];
452 compare_strings(BoolRes
, op1
, op2
, imm8
);
454 unsigned num_elements
= (imm8
& 0x1) ? 8 : 16;
455 unsigned len1
= find_eos(op1
, imm8
);
456 unsigned len2
= find_eos(op2
, imm8
);
457 Bit16u result2
= aggregate(BoolRes
, len1
, len2
, imm8
);
459 // As defined by imm8[6], result2 is then either stored to the least
460 // significant bits of XMM0 (zero extended to 128 bits) or expanded
461 // into a byte/word-mask and then stored to XMM0
463 if (num_elements
== 8) {
464 for (int index
= 0; index
< 8; index
++)
465 result
.xmm16u(index
) = (result2
& (1<<index
)) ? 0xffff : 0;
467 else { // num_elements = 16
468 for (int index
= 0; index
< 16; index
++)
469 result
.xmmubyte(index
) = (result2
& (1<<index
)) ? 0xff : 0;
473 result
.xmm64u(1) = 0;
474 result
.xmm64u(0) = (Bit64u
) result2
;
478 if (result2
!= 0) flags
|= EFlagsCFMask
;
479 if (len1
< num_elements
) flags
|= EFlagsSFMask
;
480 if (len2
< num_elements
) flags
|= EFlagsZFMask
;
482 flags
|= EFlagsOFMask
;
483 setEFlagsOSZAPC(flags
);
485 BX_WRITE_XMM_REG(0, result
); /* store result XMM0 */
487 BX_INFO(("PCMPISTRM_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
488 exception(BX_UD_EXCEPTION
, 0, 0);
493 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPISTRI_VdqWdqIb(bxInstruction_c
*i
)
495 #if (BX_SUPPORT_SSE > 4) || (BX_SUPPORT_SSE >= 4 && BX_SUPPORT_SSE_EXTENSION > 0)
496 BX_CPU_THIS_PTR
prepareSSE();
498 BxPackedXmmRegister op1
= BX_READ_XMM_REG(i
->nnn()), op2
;
499 Bit8u imm8
= i
->Ib();
501 /* op2 is a register or memory reference */
503 op2
= BX_READ_XMM_REG(i
->rm());
506 bx_address eaddr
= BX_CPU_CALL_METHODR(i
->ResolveModrm
, (i
));
507 /* pointer, segment address pair */
508 readVirtualDQwordAligned(i
->seg(), eaddr
, (Bit8u
*) &op2
);
511 // compare all pairs of Ai, Bj
512 Bit8u BoolRes
[16][16];
513 compare_strings(BoolRes
, op1
, op2
, imm8
);
514 unsigned num_elements
= (imm8
& 0x1) ? 8 : 16;
517 unsigned len1
= find_eos(op1
, imm8
);
518 unsigned len2
= find_eos(op2
, imm8
);
519 Bit16u result2
= aggregate(BoolRes
, len1
, len2
, imm8
);
521 // The index of the first (or last, according to imm8[6]) set bit of result2
522 // is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
524 // The index returned to ECX is of the MSB in result2
525 for (index
=num_elements
-1; index
>=0; index
--)
526 if (result2
& (1<<index
)) break;
527 if (index
< 0) index
= num_elements
;
530 // The index returned to ECX is of the LSB in result2
531 for (index
=0; index
<(int)num_elements
; index
++)
532 if (result2
& (1<<index
)) break;
537 if (result2
!= 0) flags
|= EFlagsCFMask
;
538 if (len1
< num_elements
) flags
|= EFlagsSFMask
;
539 if (len2
< num_elements
) flags
|= EFlagsZFMask
;
541 flags
|= EFlagsOFMask
;
542 setEFlagsOSZAPC(flags
);
545 BX_INFO(("PCMPISTRI_VdqWdqIb: required SSE4.2, use --enable-sse and --enable-sse-extension options"));
546 exception(BX_UD_EXCEPTION
, 0, 0);
550 #endif // (BX_SUPPORT_SSE >= 4) || (BX_SUPPORT_SSE >= 3 && BX_SUPPORT_SSE_EXTENSION > 0)