* updated firefox (133.0.3 -> 134.0.2)
[t2sde.git] / package / emulators / dosbox / ppc-dynrec.patch
blob33aadfc475952da0123260b7f456fc1981598528
1 diff --git a/include/fpu.h b/include/fpu.h
2 index 44acd31b..2b7cee5e 100644
3 --- a/include/fpu.h
4 +++ b/include/fpu.h
5 @@ -85,7 +85,7 @@ enum FPU_Round {
6 ROUND_Chop = 3
7 };
9 -typedef struct {
10 +typedef struct FPU_rec {
11 FPU_Reg regs[9];
12 FPU_P_Reg p_regs[9];
13 FPU_Tag tags[9];
14 --- a/src/cpu/core_dynrec.cpp.orig 2024-08-20 16:10:26.443024003 +0200
15 +++ b/src/cpu/core_dynrec.cpp 2024-08-20 16:28:12.668109187 +0200
16 @@ -138,7 +138,7 @@
17 #define X86_64 0x02
18 #define MIPSEL 0x03
19 #define ARMV4LE 0x04
20 -#define POWERPC 0x04
21 +#define POWERPC 0x06
23 #if C_TARGETCPU == X86_64
24 #include "core_dynrec/risc_x64.h"
25 @@ -152,6 +152,11 @@
26 #include "core_dynrec/risc_ppc.h"
27 #endif
29 +#if !defined(WORDS_BIGENDIAN)
30 +#define gen_add_LE gen_add
31 +#define gen_mov_LE_word_to_reg gen_mov_word_to_reg
32 +#endif
34 #include "core_dynrec/decoder.h"
36 CacheBlockDynRec * LinkBlocks(BlockReturn ret) {
37 diff --git a/src/cpu/core_dynrec/cache.h b/src/cpu/core_dynrec/cache.h
38 index 9ae81eb3..faee3167 100644
39 --- a/src/cpu/core_dynrec/cache.h
40 +++ b/src/cpu/core_dynrec/cache.h
41 @@ -145,7 +145,7 @@ public:
42 if (host_readb(hostmem+addr)==(Bit8u)val) return;
43 host_writeb(hostmem+addr,val);
44 // see if there's code where we are writing to
45 - if (!host_readb(&write_map[addr])) {
46 + if (!write_map[addr]) {
47 if (active_blocks) return; // still some blocks in this page
48 active_count--;
49 if (!active_count) Release(); // delay page releasing until active_count is zero
50 @@ -162,7 +162,7 @@ public:
51 if (host_readw(hostmem+addr)==(Bit16u)val) return;
52 host_writew(hostmem+addr,val);
53 // see if there's code where we are writing to
54 - if (!host_readw(&write_map[addr])) {
55 + if (!*(Bit16u*)&write_map[addr]) {
56 if (active_blocks) return; // still some blocks in this page
57 active_count--;
58 if (!active_count) Release(); // delay page releasing until active_count is zero
59 @@ -171,7 +171,7 @@ public:
60 invalidation_map=(Bit8u*)malloc(4096);
61 memset(invalidation_map,0,4096);
63 -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
64 +#if !defined(C_UNALIGNED_MEMORY)
65 host_writew(&invalidation_map[addr],
66 host_readw(&invalidation_map[addr])+0x101);
67 #else
68 @@ -184,7 +184,7 @@ public:
69 if (host_readd(hostmem+addr)==(Bit32u)val) return;
70 host_writed(hostmem+addr,val);
71 // see if there's code where we are writing to
72 - if (!host_readd(&write_map[addr])) {
73 + if (!*(Bit32u*)&write_map[addr]) {
74 if (active_blocks) return; // still some blocks in this page
75 active_count--;
76 if (!active_count) Release(); // delay page releasing until active_count is zero
77 @@ -193,7 +193,7 @@ public:
78 invalidation_map=(Bit8u*)malloc(4096);
79 memset(invalidation_map,0,4096);
81 -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
82 +#if !defined(C_UNALIGNED_MEMORY)
83 host_writed(&invalidation_map[addr],
84 host_readd(&invalidation_map[addr])+0x1010101);
85 #else
86 @@ -229,7 +229,7 @@ public:
87 addr&=4095;
88 if (host_readw(hostmem+addr)==(Bit16u)val) return false;
89 // see if there's code where we are writing to
90 - if (!host_readw(&write_map[addr])) {
91 + if (!*(Bit16u*)&write_map[addr]) {
92 if (!active_blocks) {
93 // no blocks left in this page, still delay the page releasing a bit
94 active_count--;
95 @@ -240,7 +240,7 @@ public:
96 invalidation_map=(Bit8u*)malloc(4096);
97 memset(invalidation_map,0,4096);
99 -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
100 +#if !defined(C_UNALIGNED_MEMORY)
101 host_writew(&invalidation_map[addr],
102 host_readw(&invalidation_map[addr])+0x101);
103 #else
104 @@ -258,7 +258,7 @@ public:
105 addr&=4095;
106 if (host_readd(hostmem+addr)==(Bit32u)val) return false;
107 // see if there's code where we are writing to
108 - if (!host_readd(&write_map[addr])) {
109 + if (!*(Bit32u*)&write_map[addr]) {
110 if (!active_blocks) {
111 // no blocks left in this page, still delay the page releasing a bit
112 active_count--;
113 @@ -269,7 +269,7 @@ public:
114 invalidation_map=(Bit8u*)malloc(4096);
115 memset(invalidation_map,0,4096);
117 -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY)
118 +#if !defined(C_UNALIGNED_MEMORY)
119 host_writed(&invalidation_map[addr],
120 host_readd(&invalidation_map[addr])+0x1010101);
121 #else
122 @@ -372,11 +372,11 @@ public:
123 return 0; // none found
126 - HostPt GetHostReadPt(Bitu phys_page) {
127 + HostPt GetHostReadPt(Bitu phys_page) {
128 hostmem=old_pagehandler->GetHostReadPt(phys_page);
129 return hostmem;
131 - HostPt GetHostWritePt(Bitu phys_page) {
132 + HostPt GetHostWritePt(Bitu phys_page) {
133 return GetHostReadPt( phys_page );
135 public:
136 @@ -392,7 +392,7 @@ private:
138 Bitu active_blocks; // the number of cache blocks in this page
139 Bitu active_count; // delaying parameter to not immediately release a page
140 - HostPt hostmem;
141 + HostPt hostmem;
142 Bitu phys_page;
145 @@ -433,13 +433,13 @@ void CacheBlockDynRec::Clear(void) {
146 wherelink = &(*wherelink)->link[ind].next;
148 // now remove the link
149 - if(*wherelink)
150 + if(*wherelink)
151 *wherelink = (*wherelink)->link[ind].next;
152 else {
153 LOG(LOG_CPU,LOG_ERROR)("Cache anomaly. please investigate");
156 - } else
157 + } else
158 cache_addunusedblock(this);
159 if (crossblock) {
160 // clear out the crossblock (in the page before) as well
161 @@ -464,7 +464,7 @@ static CacheBlockDynRec * cache_openblock(void) {
162 // check for enough space in this block
163 Bitu size=block->cache.size;
164 CacheBlockDynRec * nextblock=block->cache.next;
165 - if (block->page.handler)
166 + if (block->page.handler)
167 block->Clear();
168 // block size must be at least CACHE_MAXSIZE
169 while (size<CACHE_MAXSIZE) {
170 @@ -473,7 +473,7 @@ static CacheBlockDynRec * cache_openblock(void) {
171 // merge blocks
172 size+=nextblock->cache.size;
173 CacheBlockDynRec * tempblock=nextblock->cache.next;
174 - if (nextblock->page.handler)
175 + if (nextblock->page.handler)
176 nextblock->Clear();
177 // block is free now
178 cache_addunusedblock(nextblock);
179 @@ -500,8 +500,8 @@ static void cache_closeblock(void) {
180 Bitu written=(Bitu)(cache.pos-block->cache.start);
181 if (written>block->cache.size) {
182 if (!block->cache.next) {
183 - if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size);
184 - } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size);
185 + if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size);
186 + } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size);
187 } else {
188 Bitu new_size;
189 Bitu left=block->cache.size-written;
190 @@ -553,12 +553,14 @@ static INLINE void cache_addq(Bit64u val) {
192 static void dyn_return(BlockReturn retcode,bool ret_exception);
193 static void dyn_run_code(void);
194 +static void cache_block_before_close(void);
195 +static void cache_block_closing(Bit8u* block_start,Bitu block_size);
198 /* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */
199 #if (C_HAVE_MPROTECT)
200 #define PAGESIZE_TEMP PAGESIZE
201 -#else
202 +#else
203 #define PAGESIZE_TEMP 4096
204 #endif
206 @@ -614,18 +616,27 @@ static void cache_init(bool enable) {
208 // setup the default blocks for block linkage returns
209 cache.pos=&cache_code_link_blocks[0];
210 + core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
211 + // can use op to PAGESIZE_TEMP-64 bytes
212 + dyn_run_code();
213 + cache_block_before_close();
214 + cache_block_closing(cache_code_link_blocks, cache.pos-cache_code_link_blocks);
216 + cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64];
217 link_blocks[0].cache.start=cache.pos;
218 // link code that returns with a special return code
219 + // must be less than 32 bytes
220 dyn_return(BR_Link1,false);
221 - cache.pos=&cache_code_link_blocks[32];
222 + cache_block_before_close();
223 + cache_block_closing(link_blocks[0].cache.start, cache.pos-link_blocks[0].cache.start);
225 + cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32];
226 link_blocks[1].cache.start=cache.pos;
227 // link code that returns with a special return code
228 + // must be less than 32 bytes
229 dyn_return(BR_Link2,false);
231 - cache.pos=&cache_code_link_blocks[64];
232 - core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos;
233 -// link_blocks[1].cache.start=cache.pos;
234 - dyn_run_code();
235 + cache_block_before_close();
236 + cache_block_closing(link_blocks[1].cache.start, cache.pos-link_blocks[1].cache.start);
238 cache.free_pages=0;
239 cache.last_page=0;
240 diff --git a/src/cpu/core_dynrec/decoder_basic.h b/src/cpu/core_dynrec/decoder_basic.h
241 index c8e2a8ef..3352c5fc 100644
242 --- a/src/cpu/core_dynrec/decoder_basic.h
243 +++ b/src/cpu/core_dynrec/decoder_basic.h
244 @@ -502,7 +502,6 @@ static INLINE void dyn_set_eip_end(HostReg reg,Bit32u imm=0) {
245 gen_mov_word_to_reg(reg,&reg_eip,true); //get_extend_word will mask off the upper bits
246 //gen_mov_word_to_reg(reg,&reg_eip,decode.big_op);
247 gen_add_imm(reg,(Bit32u)(decode.code-decode.code_start+imm));
248 - if (!decode.big_op) gen_extend_word(false,reg);
252 @@ -995,10 +994,10 @@ skip_extend_word:
253 // succeeded, use the pointer to avoid code invalidation
254 if (!addseg) {
255 if (!scaled_reg_used) {
256 - gen_mov_word_to_reg(ea_reg,(void*)val,true);
257 + gen_mov_LE_word_to_reg(ea_reg,(void*)val,true);
258 } else {
259 DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0);
260 - gen_add(ea_reg,(void*)val);
261 + gen_add_LE(ea_reg,(void*)val);
263 } else {
264 if (!scaled_reg_used) {
265 @@ -1006,7 +1005,7 @@ skip_extend_word:
266 } else {
267 DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
269 - gen_add(ea_reg,(void*)val);
270 + gen_add_LE(ea_reg,(void*)val);
272 return;
274 @@ -1047,10 +1046,10 @@ skip_extend_word:
275 if (!addseg) {
276 if (!scaled_reg_used) {
277 MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
278 - gen_add(ea_reg,(void*)val);
279 + gen_add_LE(ea_reg,(void*)val);
280 } else {
281 DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0);
282 - gen_add(ea_reg,(void*)val);
283 + gen_add_LE(ea_reg,(void*)val);
285 } else {
286 if (!scaled_reg_used) {
287 @@ -1059,7 +1058,7 @@ skip_extend_word:
288 DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0);
290 ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
291 - gen_add(ea_reg,(void*)val);
292 + gen_add_LE(ea_reg,(void*)val);
294 return;
296 @@ -1124,11 +1123,11 @@ skip_extend_word:
297 // succeeded, use the pointer to avoid code invalidation
298 if (!addseg) {
299 MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
300 - gen_add(ea_reg,(void*)val);
301 + gen_add_LE(ea_reg,(void*)val);
302 } else {
303 MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base));
304 ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg);
305 - gen_add(ea_reg,(void*)val);
306 + gen_add_LE(ea_reg,(void*)val);
308 return;
310 diff --git a/src/cpu/core_dynrec/decoder_opcodes.h b/src/cpu/core_dynrec/decoder_opcodes.h
311 index 67eaee2c..82bbbbb9 100644
312 --- a/src/cpu/core_dynrec/decoder_opcodes.h
313 +++ b/src/cpu/core_dynrec/decoder_opcodes.h
314 @@ -250,12 +250,12 @@ static void dyn_prep_word_imm(Bit8u reg) {
315 Bitu val;
316 if (decode.big_op) {
317 if (decode_fetchd_imm(val)) {
318 - gen_mov_word_to_reg(FC_OP2,(void*)val,true);
319 + gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true);
320 return;
322 } else {
323 if (decode_fetchw_imm(val)) {
324 - gen_mov_word_to_reg(FC_OP2,(void*)val,false);
325 + gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false);
326 return;
329 @@ -287,13 +287,13 @@ static void dyn_mov_word_imm(Bit8u reg) {
330 Bitu val;
331 if (decode.big_op) {
332 if (decode_fetchd_imm(val)) {
333 - gen_mov_word_to_reg(FC_OP1,(void*)val,true);
334 + gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true);
335 MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg);
336 return;
338 } else {
339 if (decode_fetchw_imm(val)) {
340 - gen_mov_word_to_reg(FC_OP1,(void*)val,false);
341 + gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false);
342 MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg);
343 return;
345 @@ -330,7 +330,7 @@ static void dyn_mov_byte_direct_al() {
346 if (decode.big_addr) {
347 Bitu val;
348 if (decode_fetchd_imm(val)) {
349 - gen_add(FC_ADDR,(void*)val);
350 + gen_add_LE(FC_ADDR,(void*)val);
351 } else {
352 gen_add_imm(FC_ADDR,(Bit32u)val);
354 @@ -1184,11 +1184,8 @@ static void dyn_ret_near(Bitu bytes) {
355 dyn_reduce_cycles();
357 if (decode.big_op) gen_call_function_raw((void*)&dynrec_pop_dword);
358 - else {
359 - gen_call_function_raw((void*)&dynrec_pop_word);
360 - gen_extend_word(false,FC_RETOP);
362 - gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(&reg_eip):(void*)(&reg_ip),true);
363 + else gen_call_function_raw((void*)&dynrec_pop_word);
364 + gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(&reg_eip):(void*)(&reg_ip),decode.big_op);
366 if (bytes) gen_add_direct_word(&reg_esp,bytes,true);
367 dyn_return(BR_Normal);
368 diff --git a/src/cpu/core_dynrec/risc_ppc.h b/src/cpu/core_dynrec/risc_ppc.h
369 new file mode 100644
370 index 00000000..156843a6
371 --- /dev/null
372 +++ b/src/cpu/core_dynrec/risc_ppc.h
373 @@ -0,0 +1,901 @@
375 + * Copyright (C) 2002-2019 The DOSBox Team
377 + * This program is free software; you can redistribute it and/or modify
378 + * it under the terms of the GNU General Public License as published by
379 + * the Free Software Foundation; either version 2 of the License, or
380 + * (at your option) any later version.
382 + * This program is distributed in the hope that it will be useful,
383 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
384 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
385 + * GNU General Public License for more details.
387 + * You should have received a copy of the GNU General Public License along
388 + * with this program; if not, write to the Free Software Foundation, Inc.,
389 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
390 + */
392 +// some configuring defines that specify the capabilities of this architecture
393 +// or aspects of the recompiling
395 +// protect FC_ADDR over function calls if necessaray
396 +//#define DRC_PROTECT_ADDR_REG
398 +// try to use non-flags generating functions if possible
399 +#define DRC_FLAGS_INVALIDATION
400 +// try to replace _simple functions by code
401 +#define DRC_FLAGS_INVALIDATION_DCODE
403 +// type with the same size as a pointer
404 +#define DRC_PTR_SIZE_IM Bit32u
406 +// calling convention modifier
407 +#define DRC_FC /* nothing */
408 +#define DRC_CALL_CONV /* nothing */
410 +#define DRC_USE_REGS_ADDR
411 +#define DRC_USE_SEGS_ADDR
413 +#if defined(_CALL_SYSV)
414 +// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value)
415 +#define USE_SDA_BASE
416 +#endif
418 +// register mapping
419 +enum HostReg {
420 + HOST_R0=0,
421 + HOST_R1,
422 + HOST_R2,
423 + HOST_R3,
424 + HOST_R4,
425 + HOST_R5,
426 + HOST_R6,
427 + HOST_R7,
428 + HOST_R8,
429 + HOST_R9,
430 + HOST_R10,
431 + HOST_R11,
432 + HOST_R12,
433 + HOST_R13,
434 + HOST_R14,
435 + HOST_R15,
436 + HOST_R16,
437 + HOST_R17,
438 + HOST_R18,
439 + HOST_R19,
440 + HOST_R20,
441 + HOST_R21,
442 + HOST_R22,
443 + HOST_R23,
444 + HOST_R24,
445 + HOST_R25,
446 + HOST_R26, // generic non-volatile (used for inline adc/sbb)
447 + HOST_R27, // points to current CacheBlockDynRec (decode.block)
448 + HOST_R28, // points to fpu
449 + HOST_R29, // FC_ADDR
450 + HOST_R30, // points to Segs
451 + HOST_R31, // points to cpu_regs
453 + HOST_NONE
456 +static const HostReg RegParams[] = {
457 + HOST_R3, HOST_R4, HOST_R5, HOST_R6,
458 + HOST_R7, HOST_R8, HOST_R9, HOST_R10
461 +#if C_FPU
462 +#include "fpu.h"
463 +extern struct FPU_rec fpu;
464 +#endif
466 +#if defined(USE_SDA_BASE)
467 +extern Bit32u _SDA_BASE_[];
468 +#endif
470 +// register that holds function return values
471 +#define FC_RETOP HOST_R3
473 +// register used for address calculations, if the ABI does not
474 +// state that this register is preserved across function calls
475 +// then define DRC_PROTECT_ADDR_REG above
476 +#define FC_ADDR HOST_R29
478 +// register that points to Segs[]
479 +#define FC_SEGS_ADDR HOST_R30
480 +// register that points to cpu_regs[]
481 +#define FC_REGS_ADDR HOST_R31
483 +// register that holds the first parameter
484 +#define FC_OP1 RegParams[0]
486 +// register that holds the second parameter
487 +#define FC_OP2 RegParams[1]
489 +// special register that holds the third parameter for _R3 calls (byte accessible)
490 +#define FC_OP3 RegParams[2]
492 +// register that holds byte-accessible temporary values
493 +#define FC_TMP_BA1 FC_OP2
495 +// register that holds byte-accessible temporary values
496 +#define FC_TMP_BA2 FC_OP1
498 +// temporary register for LEA
499 +#define TEMP_REG_DRC HOST_R10
501 +#define IMM(op, regsd, rega, imm) (((op)<<26)|((regsd)<<21)|((rega)<<16)| (((Bit32u)(imm))&0xFFFF))
502 +#define EXT(regsd, rega, regb, op, rc) ( (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)| ((op)<<1)|(rc))
503 +#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)| ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc))
505 +#define IMM_OP(op, regsd, rega, imm) cache_addd(IMM(op, regsd, rega, imm))
506 +#define EXT_OP(regsd, rega, regb, op, rc) cache_addd(EXT(regsd, rega, regb, op, rc))
507 +#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc))
509 +// move a full register from reg_src to reg_dst
510 +static void gen_mov_regs(HostReg reg_dst,HostReg reg_src)
512 + if (reg_dst != reg_src)
513 + EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src)
516 +// move a 16bit constant value into dest_reg
517 +// the upper 16bit of the destination register may be destroyed
518 +static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)
520 + IMM_OP(14, dest_reg, 0, imm); // li dest,imm
523 +DRC_PTR_SIZE_IM block_ptr;
525 +// Helper for loading addresses
526 +static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest)
528 + Bit32s off;
530 + if ((Bit16s)addr == addr)
531 + return HOST_R0;
533 + off = addr - (Bit32s)&Segs;
534 + if ((Bit16s)off == off)
536 + addr = off;
537 + return FC_SEGS_ADDR;
540 + off = addr - (Bit32s)&cpu_regs;
541 + if ((Bit16s)off == off)
543 + addr = off;
544 + return FC_REGS_ADDR;
547 + off = addr - (Bit32s)block_ptr;
548 + if ((Bit16s)off == off)
550 + addr = off;
551 + return HOST_R27;
554 +#if C_FPU
555 + off = addr - (Bit32s)&fpu;
556 + if ((Bit16s)off == off)
558 + addr = off;
559 + return HOST_R28;
561 +#endif
563 +#if defined(USE_SDA_BASE)
564 + off = addr - (Bit32s)_SDA_BASE_;
565 + if ((Bit16s)off == off)
567 + addr = off;
568 + return HOST_R13;
570 +#endif
572 + IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha
573 + addr = (Bit16s)addr;
574 + return dest;
577 +// move a 32bit constant value into dest_reg
578 +static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)
580 + HostReg ld = gen_addr((Bit32s&)imm, dest_reg);
581 + if (imm || ld != dest_reg)
582 + IMM_OP(14, dest_reg, ld, imm); // addi dest_reg, ldr, imm@l
585 +// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
586 +// 16bit moves may destroy the upper 16bit of the destination register
587 +static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword)
589 + Bit32s addr = (Bit32s)data;
590 + HostReg ld = gen_addr(addr, dest_reg);
591 + IMM_OP(dword ? 32:40, dest_reg, ld, addr); // lwz/lhz dest, addr@l(ld)
594 +// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg
595 +static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) {
596 + Bit32u addr = (Bit32u)data;
597 + gen_mov_dword_to_reg_imm(dest_reg, addr);
598 + EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest
601 +// move an 8bit constant value into dest_reg
602 +// the upper 24bit of the destination register can be destroyed
603 +// this function does not use FC_OP1/FC_OP2 as dest_reg as these
604 +// registers might not be directly byte-accessible on some architectures
605 +static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
606 + gen_mov_word_to_reg_imm(dest_reg, imm);
609 +// move an 8bit constant value into dest_reg
610 +// the upper 24bit of the destination register can be destroyed
611 +// this function can use FC_OP1/FC_OP2 as dest_reg which are
612 +// not directly byte-accessible on some architectures
613 +static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
614 + gen_mov_word_to_reg_imm(dest_reg, imm);
617 +// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
618 +static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword)
620 + Bit32s addr = (Bit32s)dest;
621 + HostReg ld = gen_addr(addr, HOST_R8);
622 + IMM_OP(dword ? 36 : 44, src_reg, ld, addr); // stw/sth src,addr@l(ld)
625 +// move an 8bit value from memory into dest_reg
626 +// the upper 24bit of the destination register can be destroyed
627 +// this function does not use FC_OP1/FC_OP2 as dest_reg as these
628 +// registers might not be directly byte-accessible on some architectures
629 +static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data)
631 + Bit32s addr = (Bit32s)data;
632 + HostReg ld = gen_addr(addr, dest_reg);
633 + IMM_OP(34, dest_reg, ld, addr); // lbz dest,addr@l(ld)
636 +// move an 8bit value from memory into dest_reg
637 +// the upper 24bit of the destination register can be destroyed
638 +// this function can use FC_OP1/FC_OP2 as dest_reg which are
639 +// not directly byte-accessible on some architectures
640 +static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
641 + gen_mov_byte_to_reg_low(dest_reg, data);
644 +// move the lowest 8bit of a register into memory
645 +static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest)
647 + Bit32s addr = (Bit32s)dest;
648 + HostReg ld = gen_addr(addr, HOST_R8);
649 + IMM_OP(38, src_reg, ld, addr); // stb src_reg,addr@l(ld)
652 +// convert an 8bit word to a 32bit dword
653 +// the register is zero-extended (sign==false) or sign-extended (sign==true)
654 +static void gen_extend_byte(bool sign,HostReg reg)
656 + if (sign)
657 + EXT_OP(reg, reg, 0, 954, 0); // extsb reg, src
658 + else
659 + RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, src, 0, 24, 31
662 +// convert a 16bit word to a 32bit dword
663 +// the register is zero-extended (sign==false) or sign-extended (sign==true)
664 +static void gen_extend_word(bool sign,HostReg reg)
666 + if (sign)
667 + EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg
668 + else
669 + RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31
672 +// add a 32bit value from memory to a full register
673 +static void gen_add(HostReg reg,void* op)
675 + gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op
676 + EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8
679 +// add a 32bit value from host memory to a full register
680 +static void gen_add_LE(HostReg reg,void* op)
682 + gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24);
683 + EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8
686 +// add a 32bit constant value to a full register
687 +static void gen_add_imm(HostReg reg,Bit32u imm)
689 + if ((Bit16s)imm != (Bit32s)imm)
690 + IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha
691 + if ((Bit16s)imm)
692 + IMM_OP(14, reg, reg, imm); // addi reg, reg, imm@l
695 +// and a 32bit constant value with a full register
696 +static void gen_and_imm(HostReg reg,Bit32u imm) {
697 + Bits sbit,ebit,tbit,bbit,abit,i;
699 + // sbit = number of leading 0 bits
700 + // ebit = number of trailing 0 bits
701 + // tbit = number of total 0 bits
702 + // bbit = number of leading 1 bits
703 + // abit = number of trailing 1 bits
705 + if (imm == 0xFFFFFFFF)
706 + return;
708 + if (!imm)
709 + return gen_mov_word_to_reg_imm(reg, 0);
711 + sbit = ebit = tbit = bbit = abit = 0;
712 + for (i=0; i < 32; i++)
714 + if (!(imm & (1<<(31-i))))
716 + abit = 0;
717 + tbit++;
718 + if (sbit == i)
719 + sbit++;
720 + ebit++;
722 + else
724 + ebit = 0;
725 + if (bbit == i)
726 + bbit++;
727 + abit++;
731 + if (sbit + ebit == tbit)
733 + RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit
734 + return;
737 + if (sbit >= 16)
739 + IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm
740 + return;
742 + if (ebit >= 16)
744 + IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16)
745 + return;
748 + if (bbit + abit == (32 - tbit))
750 + RLW_OP(21,reg,reg,0,32-abit,bbit-1,0); // rlwinm reg,reg,0,32-abit,bbit-1
751 + return;
754 + IMM_OP(28, reg, HOST_R0, imm); // andi. r0, reg, imm@l
755 + IMM_OP(29, reg, reg, imm>16); // andis. reg, reg, imm@h
756 + EXT_OP(reg, reg, HOST_R0, 444, 0); // or reg, reg, r0
759 +// move a 32bit constant value into memory
760 +static void gen_mov_direct_dword(void* dest,Bit32u imm) {
761 + gen_mov_dword_to_reg_imm(HOST_R9, imm);
762 + gen_mov_word_from_reg(HOST_R9, dest, 1);
765 +// move an address into memory (assumes address != NULL)
766 +static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm)
768 + block_ptr = 0;
769 + gen_mov_dword_to_reg_imm(HOST_R27, imm);
770 + // this will be used to look-up the linked blocks
771 + block_ptr = imm;
772 + gen_mov_word_from_reg(HOST_R27, dest, 1);
775 +// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value
776 +static void gen_add_direct_word(void* dest,Bit32u imm,bool dword)
778 + HostReg ld;
779 + Bit32s addr = (Bit32s)dest;
781 + if (!dword)
783 + imm &= 0xFFFF;
784 + addr += 2;
787 + if (!imm)
788 + return;
790 + ld = gen_addr(addr, HOST_R8);
791 + IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld)
792 + if (dword && (Bit16s)imm != (Bit32s)imm)
793 + IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha
794 + if (!dword || (Bit16s)imm)
795 + IMM_OP(14, HOST_R9, HOST_R9, imm); // addi r9,r9,imm@l
796 + IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld)
799 +// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value
800 +static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
801 + gen_add_direct_word(dest, -(Bit32s)imm, dword);
804 +// effective address calculation, destination is dest_reg
805 +// scale_reg is scaled by scale (scale_reg*(2^scale)) and
806 +// added to dest_reg, then the immediate value is added
807 +static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)
809 + if (scale)
811 + RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // slwi scale_reg,r8,scale
812 + scale_reg = HOST_R8;
815 + gen_add_imm(dest_reg, imm);
816 + EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled
819 +// effective address calculation, destination is dest_reg
820 +// dest_reg is scaled by scale (dest_reg*(2^scale)),
821 +// then the immediate value is added
822 +static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm)
824 + if (scale)
826 + RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // slwi dest,dest,scale
829 + gen_add_imm(dest_reg, imm);
832 +// helper function to choose direct or indirect call
833 +static int INLINE do_gen_call(void *func, Bit32u *pos, bool pad)
835 + Bit32s f = (Bit32s)func;
836 + Bit32s off = f - (Bit32s)pos;
838 + // relative branches are limited to +/- ~32MB
839 + if (off < 0x02000000 && off >= -0x02000000)
841 + pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func
842 + if (pad)
844 + pos[1] = 0x4800000C; // b 12+
845 + pos[2] = pos[3] = IMM(24, 0, 0, 0); // nop
846 + return 16;
848 + return 4;
851 + pos[0] = IMM(15, HOST_R8, 0, f>>16); // lis r8,imm@h
852 + pos[1] = IMM(24, HOST_R8, HOST_R8, f); // ori r8,r8,imm@l
853 + pos[2] = EXT(HOST_R8, 9, 0, 467, 0); // mtctr r8
854 + pos[3] = IMM(19, 0x14, 0, (528<<1)|1); // bctrl
855 + return 16;
858 +// generate a call to a parameterless function
859 +static void INLINE gen_call_function_raw(void * func,bool fastcall=true)
861 + cache.pos += do_gen_call(func, (Bit32u*)cache.pos, fastcall);
864 +// generate a call to a function with paramcount parameters
865 +// note: the parameters are loaded in the architecture specific way
866 +// using the gen_load_param_ functions below
867 +static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false)
869 + Bit32u proc_addr=(Bit32u)cache.pos;
870 + gen_call_function_raw(func,fastcall);
871 + return proc_addr;
874 +// load an immediate value as param'th function parameter
875 +static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
876 + gen_mov_dword_to_reg_imm(RegParams[param], imm);
879 +// load an address as param'th function parameter
880 +static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
881 + gen_load_param_imm(addr, param);
884 +// load a host-register as param'th function parameter
885 +static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
886 + gen_mov_regs(RegParams[param], (HostReg)reg);
889 +// load a value from memory as param'th function parameter
890 +static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
891 + gen_mov_word_to_reg(RegParams[param], (void*)mem, true);
894 +// jump to an address pointed at by ptr, offset is in imm
895 +static void gen_jmp_ptr(void * ptr,Bits imm=0) {
896 + gen_mov_word_to_reg(HOST_R8,ptr,true); // r8 = *(Bit32u*)ptr
897 + if ((Bit16s)imm != (Bit32s)imm)
898 + IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha
899 + IMM_OP(32, HOST_R8, HOST_R8, imm); // lwz r8, imm@l(r8)
900 + EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8
901 + IMM_OP(19, 0x14, 0, 528<<1); // bctr
904 +// short conditional jump (+-127 bytes) if register is zero
905 +// the destination is set by gen_fill_branch() later
906 +static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword)
908 + if (!dword)
909 + IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
910 + else
911 + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
913 + IMM_OP(16, 0x0C, 2, 0); // bc 12,CR0[Z] (beq)
914 + return ((Bit32u)cache.pos-4);
917 +// short conditional jump (+-127 bytes) if register is nonzero
918 +// the destination is set by gen_fill_branch() later
919 +static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword)
921 + if (!dword)
922 + IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF
923 + else
924 + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
926 + IMM_OP(16, 0x04, 2, 0); // bc 4,CR0[Z] (bne)
927 + return ((Bit32u)cache.pos-4);
930 +// calculate relative offset and fill it into the location pointed to by data
931 +static void gen_fill_branch(DRC_PTR_SIZE_IM data)
933 +#if C_DEBUG
934 + Bits len=(Bit32u)cache.pos-data;
935 + if (len<0) len=-len;
936 + if (len >= 0x8000) LOG_MSG("Big jump %d",len);
937 +#endif
939 + ((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC;
943 +// conditional jump if register is nonzero
944 +// for isdword==true the 32bit of the register are tested
945 +// for isdword==false the lowest 8bit of the register are tested
946 +static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword)
948 + if (!dword)
949 + IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF
950 + else
951 + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
953 + IMM_OP(16, 0x04, 2, 0); // bne
954 + return ((Bit32u)cache.pos-4);
957 +// compare 32bit-register against zero and jump if value less/equal than zero
958 +static Bit32u gen_create_branch_long_leqzero(HostReg reg)
960 + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0
962 + IMM_OP(16, 0x04, 1, 0); // ble
963 + return ((Bit32u)cache.pos-4);
966 +// calculate long relative offset and fill it into the location pointed to by data
967 +static void gen_fill_branch_long(Bit32u data) {
968 + return gen_fill_branch((DRC_PTR_SIZE_IM)data);
971 +static void cache_block_closing(Bit8u* block_start,Bitu block_size)
973 +#if defined(__GNUC__)
974 + Bit8u* start = (Bit8u*)((Bit32u)block_start & -32);
976 + while (start < block_start + block_size)
978 + asm volatile("dcbst %y0\n\t icbi %y0" :: "Z"(*start));
979 + start += 32;
981 + asm volatile("sync\n\t isync");
982 +#else
983 + #error "Don't know how to flush/invalidate CacheBlock with this compiler"
984 +#endif
987 +static void cache_block_before_close(void) {}
989 +static void gen_function(void* func)
991 + Bit32s off = (Bit32s)func - (Bit32s)cache.pos;
993 + // relative branches are limited to +/- 32MB
994 + if (off < 0x02000000 && off >= -0x02000000) {
995 + cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b func
996 + return;
999 + gen_mov_dword_to_reg_imm(HOST_R8, (Bit32u)func); // r8 = func
1000 + EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8
1001 + IMM_OP(19, 0x14, 0, 528<<1); // bctr
1004 +// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it
1005 +static void* epilog_addr;
1006 +static Bit8u *getCF_glue;
1007 +static void gen_run_code(void)
1009 + // prolog
1010 + IMM_OP(37, HOST_R1, HOST_R1, -256); // stwu sp,-256(sp)
1011 + EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1
1012 + EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0
1014 + IMM_OP(47, HOST_R26, HOST_R1, 128); // stmw r26, 128(sp)
1016 + IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16); // lis FC_SEGS_ADDR, Segs@h
1017 + IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs); // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l
1019 + IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16); // lis FC_REGS_ADDR, cpu_regs@h
1020 + IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs); // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l
1022 +#if C_FPU
1023 + IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16); // lis r28, fpu@h
1024 + IMM_OP(24, HOST_R28, HOST_R28, &fpu); // ori r28, r28, fpu@l
1025 +#endif
1027 + IMM_OP(36, HOST_R0, HOST_R1, 256+4); // stw r0,256+4(sp)
1028 + IMM_OP(19, 0x14, 0, 528<<1); // bctr
1030 + // epilog
1031 + epilog_addr = cache.pos;
1032 + IMM_OP(32, HOST_R0, HOST_R1, 256+4); // lwz r0,256+4(sp)
1033 + IMM_OP(46, HOST_R26, HOST_R1, 128); // lmw r26, 128(sp)
1034 + EXT_OP(HOST_R0, 8, 0, 467, 0); // mtlr r0
1035 + IMM_OP(14, HOST_R1, HOST_R1, 256); // addi sp, sp, 256
1036 + IMM_OP(19, 0x14, 0, 16<<1); // blr
1038 + // trampoline to call get_CF()
1039 + getCF_glue = cache.pos;
1040 + gen_function((void*)get_CF);
1043 +// return from a function
1044 +static void gen_return_function(void)
1046 + gen_function(epilog_addr);
1049 +// called when a call to a function can be replaced by a
1050 +// call to a simpler function
1051 +static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type)
1053 + Bit32u *op = (Bit32u*)pos;
1054 + Bit32u *end = op+4;
1056 + switch (flags_type) {
1057 +#if defined(DRC_FLAGS_INVALIDATION_DCODE)
1058 + // try to avoid function calls but rather directly fill in code
1059 + case t_ADDb:
1060 + case t_ADDw:
1061 + case t_ADDd:
1062 + *op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2
1063 + break;
1064 + case t_ORb:
1065 + case t_ORw:
1066 + case t_ORd:
1067 + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2
1068 + break;
1069 + case t_ADCb:
1070 + case t_ADCw:
1071 + case t_ADCd:
1072 + op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2
1073 + op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
1074 + op[2] = IMM(12, HOST_R0, FC_RETOP, -1); // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = !!CF)
1075 + op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0); // addze; FC_RETOP = r26 + !!CF
1076 + return;
1077 + case t_SBBb:
1078 + case t_SBBw:
1079 + case t_SBBd:
1080 + op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0); // r26 = FC_OP1 - FC_OP2
1081 + op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF
1082 + op[2] = IMM(8, HOST_R0, FC_RETOP, 0); // subfic r0, FC_RETOP, 0 (XER[CA] = !CF)
1083 + op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0); // addme; FC_RETOP = r26 - 1 + !CF
1084 + return;
1085 + case t_ANDb:
1086 + case t_ANDw:
1087 + case t_ANDd:
1088 + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2
1089 + break;
1090 + case t_SUBb:
1091 + case t_SUBw:
1092 + case t_SUBd:
1093 + *op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1
1094 + break;
1095 + case t_XORb:
1096 + case t_XORw:
1097 + case t_XORd:
1098 + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2
1099 + break;
1100 + case t_CMPb:
1101 + case t_CMPw:
1102 + case t_CMPd:
1103 + case t_TESTb:
1104 + case t_TESTw:
1105 + case t_TESTd:
1106 + break;
1107 + case t_INCb:
1108 + case t_INCw:
1109 + case t_INCd:
1110 + *op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1
1111 + break;
1112 + case t_DECb:
1113 + case t_DECw:
1114 + case t_DECd:
1115 + *op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1
1116 + break;
1117 + case t_NEGb:
1118 + case t_NEGw:
1119 + case t_NEGd:
1120 + *op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1
1121 + break;
1122 + case t_SHLb:
1123 + case t_SHLw:
1124 + case t_SHLd:
1125 + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2
1126 + break;
1127 + case t_SHRb:
1128 + case t_SHRw:
1129 + case t_SHRd:
1130 + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2
1131 + break;
1132 + case t_SARb:
1133 + *op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1
1134 + case t_SARw:
1135 + if (flags_type == t_SARw)
1136 + *op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1
1137 + case t_SARd:
1138 + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2
1139 + break;
1141 + case t_ROLb:
1142 + *op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7
1143 + case t_ROLw:
1144 + if (flags_type == t_ROLw)
1145 + *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
1146 + case t_ROLd:
1147 + *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
1148 + break;
1150 + case t_RORb:
1151 + *op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23
1152 + case t_RORw:
1153 + if (flags_type == t_RORw)
1154 + *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15
1155 + case t_RORd:
1156 + *op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2)
1157 + *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2
1158 + break;
1160 + case t_DSHLw: // technically not correct for FC_OP3 > 16
1161 + *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
1162 + *op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3
1163 + break;
1164 + case t_DSHLd:
1165 + op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3
1166 + op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3)
1167 + op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3
1168 + op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
1169 + return;
1170 + case t_DSHRw: // technically not correct for FC_OP3 > 16
1171 + *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5
1172 + *op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3
1173 + break;
1174 + case t_DSHRd:
1175 + op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3
1176 + op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3)
1177 + op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3
1178 + op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2
1179 + return;
1180 +#endif
1181 + default:
1182 + do_gen_call(fct_ptr, op, true);
1183 + return;
1186 + *op = 0x48000000 + 4*(end-op); // b end
1189 +// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero)
1190 +// 16bit moves may destroy the upper 16bit of the destination register
1191 +static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) {
1192 + gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, false);
1195 +// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero)
1196 +static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) {
1197 + gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, true);
1200 +// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero)
1201 +static void gen_add_seg32_to_reg(HostReg reg,Bitu index) {
1202 + gen_add(reg, (Bit8u*)&Segs + index);
1205 +// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero)
1206 +// 16bit moves may destroy the upper 16bit of the destination register
1207 +static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index)
1209 + gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, false);
1212 +// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero)
1213 +static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index)
1215 + gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, true);
1218 +// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
1219 +// the upper 24bit of the destination register can be destroyed
1220 +// this function does not use FC_OP1/FC_OP2 as dest_reg as these
1221 +// registers might not be directly byte-accessible on some architectures
1222 +static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index)
1224 + gen_mov_byte_to_reg_low(dest_reg, (Bit8u*)&cpu_regs + index);
1227 +// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR
1228 +// the upper 24bit of the destination register can be destroyed
1229 +// this function can use FC_OP1/FC_OP2 as dest_reg which are
1230 +// not directly byte-accessible on some architectures
1231 +static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) {
1232 + gen_mov_byte_to_reg_low_canuseword(dest_reg, (Bit8u*)&cpu_regs + index);
1235 +// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero)
1236 +static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index)
1238 + gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, false);
1241 +// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero)
1242 +static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index)
1244 + gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, true);
1247 +// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR
1248 +static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index)
1250 + gen_mov_byte_from_reg_low(src_reg, (Bit8u*)&cpu_regs + index);
1253 +// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero)
1254 +static void gen_add_regval32_to_reg(HostReg reg,Bitu index)
1256 + gen_add(reg, (Bit8u*)&cpu_regs + index);
1259 +// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
1260 +static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) {
1261 + if (dword)
1262 + gen_mov_regval32_from_reg(src_reg, index);
1263 + else
1264 + gen_mov_regval16_from_reg(src_reg, index);
1267 +// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero)
1268 +// 16bit moves may destroy the upper 16bit of the destination register
1269 +static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) {
1270 + if (dword)
1271 + gen_mov_regval32_to_reg(dest_reg, index);
1272 + else
1273 + gen_mov_regval16_to_reg(dest_reg, index);