2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/cpu_ldst.h"
29 #include "translate.h"
30 #include "printinsn.h"
32 #include "analyze_funcs_generated.c.inc"
34 typedef void (*AnalyzeInsn
)(DisasContext
*ctx
);
35 static const AnalyzeInsn opcode_analyze
[XX_LAST_OPCODE
] = {
36 #define OPCODE(X) [X] = analyze_##X
37 #include "opcodes_def_generated.h.inc"
41 TCGv hex_gpr
[TOTAL_PER_THREAD_REGS
];
42 TCGv hex_pred
[NUM_PREGS
];
44 TCGv hex_slot_cancelled
;
45 TCGv hex_branch_taken
;
46 TCGv hex_new_value
[TOTAL_PER_THREAD_REGS
];
47 TCGv hex_reg_written
[TOTAL_PER_THREAD_REGS
];
48 TCGv hex_new_pred_value
[NUM_PREGS
];
49 TCGv hex_pred_written
;
50 TCGv hex_store_addr
[STORES_MAX
];
51 TCGv hex_store_width
[STORES_MAX
];
52 TCGv hex_store_val32
[STORES_MAX
];
53 TCGv_i64 hex_store_val64
[STORES_MAX
];
54 TCGv hex_pkt_has_store_s1
;
58 TCGv_i64 hex_llsc_val_i64
;
59 TCGv hex_vstore_addr
[VSTORES_MAX
];
60 TCGv hex_vstore_size
[VSTORES_MAX
];
61 TCGv hex_vstore_pending
[VSTORES_MAX
];
63 static const char * const hexagon_prednames
[] = {
64 "p0", "p1", "p2", "p3"
67 intptr_t ctx_future_vreg_off(DisasContext
*ctx
, int regnum
,
68 int num
, bool alloc_ok
)
72 /* See if it is already allocated */
73 for (int i
= 0; i
< ctx
->future_vregs_idx
; i
++) {
74 if (ctx
->future_vregs_num
[i
] == regnum
) {
75 return offsetof(CPUHexagonState
, future_VRegs
[i
]);
80 offset
= offsetof(CPUHexagonState
, future_VRegs
[ctx
->future_vregs_idx
]);
81 for (int i
= 0; i
< num
; i
++) {
82 ctx
->future_vregs_num
[ctx
->future_vregs_idx
+ i
] = regnum
++;
84 ctx
->future_vregs_idx
+= num
;
85 g_assert(ctx
->future_vregs_idx
<= VECTOR_TEMPS_MAX
);
89 intptr_t ctx_tmp_vreg_off(DisasContext
*ctx
, int regnum
,
90 int num
, bool alloc_ok
)
94 /* See if it is already allocated */
95 for (int i
= 0; i
< ctx
->tmp_vregs_idx
; i
++) {
96 if (ctx
->tmp_vregs_num
[i
] == regnum
) {
97 return offsetof(CPUHexagonState
, tmp_VRegs
[i
]);
102 offset
= offsetof(CPUHexagonState
, tmp_VRegs
[ctx
->tmp_vregs_idx
]);
103 for (int i
= 0; i
< num
; i
++) {
104 ctx
->tmp_vregs_num
[ctx
->tmp_vregs_idx
+ i
] = regnum
++;
106 ctx
->tmp_vregs_idx
+= num
;
107 g_assert(ctx
->tmp_vregs_idx
<= VECTOR_TEMPS_MAX
);
111 static void gen_exception_raw(int excp
)
113 gen_helper_raise_exception(cpu_env
, tcg_constant_i32(excp
));
116 static void gen_exec_counters(DisasContext
*ctx
)
118 tcg_gen_addi_tl(hex_gpr
[HEX_REG_QEMU_PKT_CNT
],
119 hex_gpr
[HEX_REG_QEMU_PKT_CNT
], ctx
->num_packets
);
120 tcg_gen_addi_tl(hex_gpr
[HEX_REG_QEMU_INSN_CNT
],
121 hex_gpr
[HEX_REG_QEMU_INSN_CNT
], ctx
->num_insns
);
122 tcg_gen_addi_tl(hex_gpr
[HEX_REG_QEMU_HVX_CNT
],
123 hex_gpr
[HEX_REG_QEMU_HVX_CNT
], ctx
->num_hvx_insns
);
126 static bool use_goto_tb(DisasContext
*ctx
, target_ulong dest
)
128 return translator_use_goto_tb(&ctx
->base
, dest
);
131 static void gen_goto_tb(DisasContext
*ctx
, int idx
, target_ulong dest
, bool
134 if (use_goto_tb(ctx
, dest
)) {
135 tcg_gen_goto_tb(idx
);
137 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], dest
);
139 tcg_gen_exit_tb(ctx
->base
.tb
, idx
);
142 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], dest
);
144 tcg_gen_lookup_and_goto_ptr();
148 static void gen_end_tb(DisasContext
*ctx
)
150 Packet
*pkt
= ctx
->pkt
;
152 gen_exec_counters(ctx
);
154 if (ctx
->branch_cond
!= TCG_COND_NEVER
) {
155 if (ctx
->branch_cond
!= TCG_COND_ALWAYS
) {
156 TCGLabel
*skip
= gen_new_label();
157 tcg_gen_brcondi_tl(ctx
->branch_cond
, hex_branch_taken
, 0, skip
);
158 gen_goto_tb(ctx
, 0, ctx
->branch_dest
, true);
160 gen_goto_tb(ctx
, 1, ctx
->next_PC
, false);
162 gen_goto_tb(ctx
, 0, ctx
->branch_dest
, true);
164 } else if (ctx
->is_tight_loop
&&
165 pkt
->insn
[pkt
->num_insns
- 1].opcode
== J2_endloop0
) {
167 * When we're in a tight loop, we defer the endloop0 processing
168 * to take advantage of direct block chaining
170 TCGLabel
*skip
= gen_new_label();
171 tcg_gen_brcondi_tl(TCG_COND_LEU
, hex_gpr
[HEX_REG_LC0
], 1, skip
);
172 tcg_gen_subi_tl(hex_gpr
[HEX_REG_LC0
], hex_gpr
[HEX_REG_LC0
], 1);
173 gen_goto_tb(ctx
, 0, ctx
->base
.tb
->pc
, true);
175 gen_goto_tb(ctx
, 1, ctx
->next_PC
, false);
177 tcg_gen_lookup_and_goto_ptr();
180 ctx
->base
.is_jmp
= DISAS_NORETURN
;
183 static void gen_exception_end_tb(DisasContext
*ctx
, int excp
)
185 gen_exec_counters(ctx
);
186 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], ctx
->next_PC
);
187 gen_exception_raw(excp
);
188 ctx
->base
.is_jmp
= DISAS_NORETURN
;
192 #define PACKET_BUFFER_LEN 1028
193 static void print_pkt(Packet
*pkt
)
195 GString
*buf
= g_string_sized_new(PACKET_BUFFER_LEN
);
196 snprint_a_pkt_debug(buf
, pkt
);
197 HEX_DEBUG_LOG("%s", buf
->str
);
198 g_string_free(buf
, true);
200 #define HEX_DEBUG_PRINT_PKT(pkt) \
207 static int read_packet_words(CPUHexagonState
*env
, DisasContext
*ctx
,
210 bool found_end
= false;
211 int nwords
, max_words
;
213 memset(words
, 0, PACKET_WORDS_MAX
* sizeof(uint32_t));
214 for (nwords
= 0; !found_end
&& nwords
< PACKET_WORDS_MAX
; nwords
++) {
216 translator_ldl(env
, &ctx
->base
,
217 ctx
->base
.pc_next
+ nwords
* sizeof(uint32_t));
218 found_end
= is_packet_end(words
[nwords
]);
221 /* Read too many words without finding the end */
225 /* Check for page boundary crossing */
226 max_words
= -(ctx
->base
.pc_next
| TARGET_PAGE_MASK
) / sizeof(uint32_t);
227 if (nwords
> max_words
) {
228 /* We can only cross a page boundary at the beginning of a TB */
229 g_assert(ctx
->base
.num_insns
== 1);
232 HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx
->base
.pc_next
);
233 HEX_DEBUG_LOG(" words = { ");
234 for (int i
= 0; i
< nwords
; i
++) {
235 HEX_DEBUG_LOG("0x%x, ", words
[i
]);
237 HEX_DEBUG_LOG("}\n");
242 static bool check_for_attrib(Packet
*pkt
, int attrib
)
244 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
245 if (GET_ATTRIB(pkt
->insn
[i
].opcode
, attrib
)) {
252 static bool need_slot_cancelled(Packet
*pkt
)
254 /* We only need slot_cancelled for conditional store instructions */
255 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
256 uint16_t opcode
= pkt
->insn
[i
].opcode
;
257 if (GET_ATTRIB(opcode
, A_CONDEXEC
) &&
258 GET_ATTRIB(opcode
, A_SCALAR_STORE
)) {
265 static bool need_pred_written(Packet
*pkt
)
267 return check_for_attrib(pkt
, A_WRITES_PRED_REG
);
270 static bool need_next_PC(DisasContext
*ctx
)
272 Packet
*pkt
= ctx
->pkt
;
274 /* Check for conditional control flow or HW loop end */
275 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
276 uint16_t opcode
= pkt
->insn
[i
].opcode
;
277 if (GET_ATTRIB(opcode
, A_CONDEXEC
) && GET_ATTRIB(opcode
, A_COF
)) {
280 if (GET_ATTRIB(opcode
, A_HWLOOP0_END
) ||
281 GET_ATTRIB(opcode
, A_HWLOOP1_END
)) {
289 * The opcode_analyze functions mark most of the writes in a packet
290 * However, there are some implicit writes marked as attributes
291 * of the applicable instructions.
293 static void mark_implicit_reg_write(DisasContext
*ctx
, int attrib
, int rnum
)
295 uint16_t opcode
= ctx
->insn
->opcode
;
296 if (GET_ATTRIB(opcode
, attrib
)) {
298 * USR is used to set overflow and FP exceptions,
299 * so treat it as conditional
301 bool is_predicated
= GET_ATTRIB(opcode
, A_CONDEXEC
) ||
304 /* LC0/LC1 is conditionally written by endloop instructions */
305 if ((rnum
== HEX_REG_LC0
|| rnum
== HEX_REG_LC1
) &&
306 (opcode
== J2_endloop0
||
307 opcode
== J2_endloop1
||
308 opcode
== J2_endloop01
)) {
309 is_predicated
= true;
312 ctx_log_reg_write(ctx
, rnum
, is_predicated
);
316 static void mark_implicit_reg_writes(DisasContext
*ctx
)
318 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_FP
, HEX_REG_FP
);
319 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_SP
, HEX_REG_SP
);
320 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_LR
, HEX_REG_LR
);
321 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_LC0
, HEX_REG_LC0
);
322 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_SA0
, HEX_REG_SA0
);
323 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_LC1
, HEX_REG_LC1
);
324 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_SA1
, HEX_REG_SA1
);
325 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_USR
, HEX_REG_USR
);
326 mark_implicit_reg_write(ctx
, A_FPOP
, HEX_REG_USR
);
329 static void mark_implicit_pred_write(DisasContext
*ctx
, int attrib
, int pnum
)
331 if (GET_ATTRIB(ctx
->insn
->opcode
, attrib
)) {
332 ctx_log_pred_write(ctx
, pnum
);
336 static void mark_implicit_pred_writes(DisasContext
*ctx
)
338 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P0
, 0);
339 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P1
, 1);
340 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P2
, 2);
341 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P3
, 3);
344 static void analyze_packet(DisasContext
*ctx
)
346 Packet
*pkt
= ctx
->pkt
;
347 ctx
->need_pkt_has_store_s1
= false;
348 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
349 Insn
*insn
= &pkt
->insn
[i
];
351 if (opcode_analyze
[insn
->opcode
]) {
352 opcode_analyze
[insn
->opcode
](ctx
);
354 mark_implicit_reg_writes(ctx
);
355 mark_implicit_pred_writes(ctx
);
359 static void gen_start_packet(DisasContext
*ctx
)
361 Packet
*pkt
= ctx
->pkt
;
362 target_ulong next_PC
= ctx
->base
.pc_next
+ pkt
->encod_pkt_size_in_bytes
;
365 /* Clear out the disassembly context */
366 ctx
->next_PC
= next_PC
;
367 ctx
->reg_log_idx
= 0;
368 bitmap_zero(ctx
->regs_written
, TOTAL_PER_THREAD_REGS
);
369 bitmap_zero(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
);
370 ctx
->preg_log_idx
= 0;
371 bitmap_zero(ctx
->pregs_written
, NUM_PREGS
);
372 ctx
->future_vregs_idx
= 0;
373 ctx
->tmp_vregs_idx
= 0;
374 ctx
->vreg_log_idx
= 0;
375 bitmap_zero(ctx
->vregs_updated_tmp
, NUM_VREGS
);
376 bitmap_zero(ctx
->vregs_updated
, NUM_VREGS
);
377 bitmap_zero(ctx
->vregs_select
, NUM_VREGS
);
378 bitmap_zero(ctx
->predicated_future_vregs
, NUM_VREGS
);
379 bitmap_zero(ctx
->predicated_tmp_vregs
, NUM_VREGS
);
380 ctx
->qreg_log_idx
= 0;
381 for (i
= 0; i
< STORES_MAX
; i
++) {
382 ctx
->store_width
[i
] = 0;
384 ctx
->s1_store_processed
= false;
385 ctx
->pre_commit
= true;
389 if (ctx
->need_pkt_has_store_s1
) {
390 tcg_gen_movi_tl(hex_pkt_has_store_s1
, pkt
->pkt_has_store_s1
);
394 * pregs_written is used both in the analyze phase as well as the code
395 * gen phase, so clear it again.
397 bitmap_zero(ctx
->pregs_written
, NUM_PREGS
);
400 /* Handy place to set a breakpoint before the packet executes */
401 gen_helper_debug_start_packet(cpu_env
);
402 tcg_gen_movi_tl(hex_this_PC
, ctx
->base
.pc_next
);
405 /* Initialize the runtime state for packet semantics */
406 if (need_slot_cancelled(pkt
)) {
407 tcg_gen_movi_tl(hex_slot_cancelled
, 0);
409 if (pkt
->pkt_has_cof
) {
410 if (pkt
->pkt_has_multi_cof
) {
411 tcg_gen_movi_tl(hex_branch_taken
, 0);
413 if (need_next_PC(ctx
)) {
414 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], next_PC
);
417 if (need_pred_written(pkt
)) {
418 tcg_gen_movi_tl(hex_pred_written
, 0);
421 /* Preload the predicated registers into hex_new_value[i] */
422 if (!bitmap_empty(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
)) {
423 int i
= find_first_bit(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
);
424 while (i
< TOTAL_PER_THREAD_REGS
) {
425 tcg_gen_mov_tl(hex_new_value
[i
], hex_gpr
[i
]);
426 i
= find_next_bit(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
,
431 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
432 if (!bitmap_empty(ctx
->predicated_future_vregs
, NUM_VREGS
)) {
433 int i
= find_first_bit(ctx
->predicated_future_vregs
, NUM_VREGS
);
434 while (i
< NUM_VREGS
) {
435 const intptr_t VdV_off
=
436 ctx_future_vreg_off(ctx
, i
, 1, true);
437 intptr_t src_off
= offsetof(CPUHexagonState
, VRegs
[i
]);
438 tcg_gen_gvec_mov(MO_64
, VdV_off
,
442 i
= find_next_bit(ctx
->predicated_future_vregs
, NUM_VREGS
, i
+ 1);
445 if (!bitmap_empty(ctx
->predicated_tmp_vregs
, NUM_VREGS
)) {
446 int i
= find_first_bit(ctx
->predicated_tmp_vregs
, NUM_VREGS
);
447 while (i
< NUM_VREGS
) {
448 const intptr_t VdV_off
=
449 ctx_tmp_vreg_off(ctx
, i
, 1, true);
450 intptr_t src_off
= offsetof(CPUHexagonState
, VRegs
[i
]);
451 tcg_gen_gvec_mov(MO_64
, VdV_off
,
455 i
= find_next_bit(ctx
->predicated_tmp_vregs
, NUM_VREGS
, i
+ 1);
460 bool is_gather_store_insn(DisasContext
*ctx
)
462 Packet
*pkt
= ctx
->pkt
;
463 Insn
*insn
= ctx
->insn
;
464 if (GET_ATTRIB(insn
->opcode
, A_CVI_NEW
) &&
465 insn
->new_value_producer_slot
== 1) {
466 /* Look for gather instruction */
467 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
468 Insn
*in
= &pkt
->insn
[i
];
469 if (GET_ATTRIB(in
->opcode
, A_CVI_GATHER
) && in
->slot
== 1) {
477 static void mark_store_width(DisasContext
*ctx
)
479 uint16_t opcode
= ctx
->insn
->opcode
;
480 uint32_t slot
= ctx
->insn
->slot
;
483 if (GET_ATTRIB(opcode
, A_SCALAR_STORE
)) {
484 if (GET_ATTRIB(opcode
, A_MEMSIZE_1B
)) {
487 if (GET_ATTRIB(opcode
, A_MEMSIZE_2B
)) {
490 if (GET_ATTRIB(opcode
, A_MEMSIZE_4B
)) {
493 if (GET_ATTRIB(opcode
, A_MEMSIZE_8B
)) {
496 tcg_debug_assert(is_power_of_2(width
));
497 ctx
->store_width
[slot
] = width
;
501 static void gen_insn(DisasContext
*ctx
)
503 if (ctx
->insn
->generate
) {
504 ctx
->insn
->generate(ctx
);
505 mark_store_width(ctx
);
507 gen_exception_end_tb(ctx
, HEX_EXCP_INVALID_OPCODE
);
512 * Helpers for generating the packet commit
514 static void gen_reg_writes(DisasContext
*ctx
)
518 for (i
= 0; i
< ctx
->reg_log_idx
; i
++) {
519 int reg_num
= ctx
->reg_log
[i
];
521 tcg_gen_mov_tl(hex_gpr
[reg_num
], hex_new_value
[reg_num
]);
524 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
525 * If we write to SA0, we have to turn off tight loop handling.
527 if (reg_num
== HEX_REG_SA0
) {
528 ctx
->is_tight_loop
= false;
533 static void gen_pred_writes(DisasContext
*ctx
)
537 /* Early exit if the log is empty */
538 if (!ctx
->preg_log_idx
) {
543 * Only endloop instructions will conditionally
544 * write a predicate. If there are no endloop
545 * instructions, we can use the non-conditional
546 * write of the predicates.
548 if (ctx
->pkt
->pkt_has_endloop
) {
549 TCGv zero
= tcg_constant_tl(0);
550 TCGv pred_written
= tcg_temp_new();
551 for (i
= 0; i
< ctx
->preg_log_idx
; i
++) {
552 int pred_num
= ctx
->preg_log
[i
];
554 tcg_gen_andi_tl(pred_written
, hex_pred_written
, 1 << pred_num
);
555 tcg_gen_movcond_tl(TCG_COND_NE
, hex_pred
[pred_num
],
557 hex_new_pred_value
[pred_num
],
561 for (i
= 0; i
< ctx
->preg_log_idx
; i
++) {
562 int pred_num
= ctx
->preg_log
[i
];
563 tcg_gen_mov_tl(hex_pred
[pred_num
], hex_new_pred_value
[pred_num
]);
565 /* Do this so HELPER(debug_commit_end) will know */
566 tcg_gen_ori_tl(hex_pred_written
, hex_pred_written
,
573 static void gen_check_store_width(DisasContext
*ctx
, int slot_num
)
576 TCGv slot
= tcg_constant_tl(slot_num
);
577 TCGv check
= tcg_constant_tl(ctx
->store_width
[slot_num
]);
578 gen_helper_debug_check_store_width(cpu_env
, slot
, check
);
582 static bool slot_is_predicated(Packet
*pkt
, int slot_num
)
584 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
585 if (pkt
->insn
[i
].slot
== slot_num
) {
586 return GET_ATTRIB(pkt
->insn
[i
].opcode
, A_CONDEXEC
);
589 /* If we get to here, we didn't find an instruction in the requested slot */
590 g_assert_not_reached();
593 void process_store(DisasContext
*ctx
, int slot_num
)
595 bool is_predicated
= slot_is_predicated(ctx
->pkt
, slot_num
);
596 TCGLabel
*label_end
= NULL
;
599 * We may have already processed this store
600 * See CHECK_NOSHUF in macros.h
602 if (slot_num
== 1 && ctx
->s1_store_processed
) {
605 ctx
->s1_store_processed
= true;
608 TCGv cancelled
= tcg_temp_new();
609 label_end
= gen_new_label();
611 /* Don't do anything if the slot was cancelled */
612 tcg_gen_extract_tl(cancelled
, hex_slot_cancelled
, slot_num
, 1);
613 tcg_gen_brcondi_tl(TCG_COND_NE
, cancelled
, 0, label_end
);
616 TCGv address
= tcg_temp_new();
617 tcg_gen_mov_tl(address
, hex_store_addr
[slot_num
]);
620 * If we know the width from the DisasContext, we can
621 * generate much cleaner code.
622 * Unfortunately, not all instructions execute the fSTORE
623 * macro during code generation. Anything that uses the
624 * generic helper will have this problem. Instructions
625 * that use fWRAP to generate proper TCG code will be OK.
627 switch (ctx
->store_width
[slot_num
]) {
629 gen_check_store_width(ctx
, slot_num
);
630 tcg_gen_qemu_st8(hex_store_val32
[slot_num
],
631 hex_store_addr
[slot_num
],
635 gen_check_store_width(ctx
, slot_num
);
636 tcg_gen_qemu_st16(hex_store_val32
[slot_num
],
637 hex_store_addr
[slot_num
],
641 gen_check_store_width(ctx
, slot_num
);
642 tcg_gen_qemu_st32(hex_store_val32
[slot_num
],
643 hex_store_addr
[slot_num
],
647 gen_check_store_width(ctx
, slot_num
);
648 tcg_gen_qemu_st64(hex_store_val64
[slot_num
],
649 hex_store_addr
[slot_num
],
655 * If we get to here, we don't know the width at
656 * TCG generation time, we'll use a helper to
657 * avoid branching based on the width at runtime.
659 TCGv slot
= tcg_constant_tl(slot_num
);
660 gen_helper_commit_store(cpu_env
, slot
);
665 gen_set_label(label_end
);
669 static void process_store_log(DisasContext
*ctx
)
672 * When a packet has two stores, the hardware processes
673 * slot 1 and then slot 0. This will be important when
674 * the memory accesses overlap.
676 Packet
*pkt
= ctx
->pkt
;
677 if (pkt
->pkt_has_store_s1
) {
678 g_assert(!pkt
->pkt_has_dczeroa
);
679 process_store(ctx
, 1);
681 if (pkt
->pkt_has_store_s0
) {
682 g_assert(!pkt
->pkt_has_dczeroa
);
683 process_store(ctx
, 0);
687 /* Zero out a 32-bit cache line */
688 static void process_dczeroa(DisasContext
*ctx
)
690 if (ctx
->pkt
->pkt_has_dczeroa
) {
691 /* Store 32 bytes of zero starting at (addr & ~0x1f) */
692 TCGv addr
= tcg_temp_new();
693 TCGv_i64 zero
= tcg_constant_i64(0);
695 tcg_gen_andi_tl(addr
, hex_dczero_addr
, ~0x1f);
696 tcg_gen_qemu_st64(zero
, addr
, ctx
->mem_idx
);
697 tcg_gen_addi_tl(addr
, addr
, 8);
698 tcg_gen_qemu_st64(zero
, addr
, ctx
->mem_idx
);
699 tcg_gen_addi_tl(addr
, addr
, 8);
700 tcg_gen_qemu_st64(zero
, addr
, ctx
->mem_idx
);
701 tcg_gen_addi_tl(addr
, addr
, 8);
702 tcg_gen_qemu_st64(zero
, addr
, ctx
->mem_idx
);
706 static bool pkt_has_hvx_store(Packet
*pkt
)
709 for (i
= 0; i
< pkt
->num_insns
; i
++) {
710 int opcode
= pkt
->insn
[i
].opcode
;
711 if (GET_ATTRIB(opcode
, A_CVI
) && GET_ATTRIB(opcode
, A_STORE
)) {
718 static void gen_commit_hvx(DisasContext
*ctx
)
723 * for (i = 0; i < ctx->vreg_log_idx; i++) {
724 * int rnum = ctx->vreg_log[i];
725 * env->VRegs[rnum] = env->future_VRegs[rnum];
728 for (i
= 0; i
< ctx
->vreg_log_idx
; i
++) {
729 int rnum
= ctx
->vreg_log
[i
];
730 intptr_t dstoff
= offsetof(CPUHexagonState
, VRegs
[rnum
]);
731 intptr_t srcoff
= ctx_future_vreg_off(ctx
, rnum
, 1, false);
732 size_t size
= sizeof(MMVector
);
734 tcg_gen_gvec_mov(MO_64
, dstoff
, srcoff
, size
, size
);
738 * for (i = 0; i < ctx->qreg_log_idx; i++) {
739 * int rnum = ctx->qreg_log[i];
740 * env->QRegs[rnum] = env->future_QRegs[rnum];
743 for (i
= 0; i
< ctx
->qreg_log_idx
; i
++) {
744 int rnum
= ctx
->qreg_log
[i
];
745 intptr_t dstoff
= offsetof(CPUHexagonState
, QRegs
[rnum
]);
746 intptr_t srcoff
= offsetof(CPUHexagonState
, future_QRegs
[rnum
]);
747 size_t size
= sizeof(MMQReg
);
749 tcg_gen_gvec_mov(MO_64
, dstoff
, srcoff
, size
, size
);
752 if (pkt_has_hvx_store(ctx
->pkt
)) {
753 gen_helper_commit_hvx_stores(cpu_env
);
757 static void update_exec_counters(DisasContext
*ctx
)
759 Packet
*pkt
= ctx
->pkt
;
760 int num_insns
= pkt
->num_insns
;
761 int num_real_insns
= 0;
762 int num_hvx_insns
= 0;
764 for (int i
= 0; i
< num_insns
; i
++) {
765 if (!pkt
->insn
[i
].is_endloop
&&
766 !pkt
->insn
[i
].part1
&&
767 !GET_ATTRIB(pkt
->insn
[i
].opcode
, A_IT_NOP
)) {
770 if (GET_ATTRIB(pkt
->insn
[i
].opcode
, A_CVI
)) {
776 ctx
->num_insns
+= num_real_insns
;
777 ctx
->num_hvx_insns
+= num_hvx_insns
;
780 static void gen_commit_packet(DisasContext
*ctx
)
783 * If there is more than one store in a packet, make sure they are all OK
784 * before proceeding with the rest of the packet commit.
786 * dczeroa has to be the only store operation in the packet, so we go
787 * ahead and process that first.
789 * When there is an HVX store, there can also be a scalar store in either
790 * slot 0 or slot1, so we create a mask for the helper to indicate what
793 * When there are two scalar stores, we probe the one in slot 0.
795 * Note that we don't call the probe helper for packets with only one
796 * store. Therefore, we call process_store_log before anything else
797 * involved in committing the packet.
799 Packet
*pkt
= ctx
->pkt
;
800 bool has_store_s0
= pkt
->pkt_has_store_s0
;
801 bool has_store_s1
= (pkt
->pkt_has_store_s1
&& !ctx
->s1_store_processed
);
802 bool has_hvx_store
= pkt_has_hvx_store(pkt
);
803 if (pkt
->pkt_has_dczeroa
) {
805 * The dczeroa will be the store in slot 0, check that we don't have
806 * a store in slot 1 or an HVX store.
808 g_assert(!has_store_s1
&& !has_hvx_store
);
809 process_dczeroa(ctx
);
810 } else if (has_hvx_store
) {
811 if (!has_store_s0
&& !has_store_s1
) {
812 TCGv mem_idx
= tcg_constant_tl(ctx
->mem_idx
);
813 gen_helper_probe_hvx_stores(cpu_env
, mem_idx
);
819 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
, HAS_ST0
, 1);
823 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
, HAS_ST1
, 1);
827 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
,
830 if (has_store_s0
&& slot_is_predicated(pkt
, 0)) {
832 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
,
835 if (has_store_s1
&& slot_is_predicated(pkt
, 1)) {
837 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
,
840 mask
= FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
, MMU_IDX
,
842 gen_helper_probe_pkt_scalar_hvx_stores(cpu_env
,
843 tcg_constant_tl(mask
));
845 } else if (has_store_s0
&& has_store_s1
) {
847 * process_store_log will execute the slot 1 store first,
848 * so we only have to probe the store in slot 0
852 FIELD_DP32(args
, PROBE_PKT_SCALAR_STORE_S0
, MMU_IDX
, ctx
->mem_idx
);
853 if (slot_is_predicated(pkt
, 0)) {
855 FIELD_DP32(args
, PROBE_PKT_SCALAR_STORE_S0
, IS_PREDICATED
, 1);
857 TCGv args_tcgv
= tcg_constant_tl(args
);
858 gen_helper_probe_pkt_scalar_store_s0(cpu_env
, args_tcgv
);
861 process_store_log(ctx
);
864 gen_pred_writes(ctx
);
865 if (pkt
->pkt_has_hvx
) {
868 update_exec_counters(ctx
);
871 tcg_constant_tl(pkt
->pkt_has_store_s0
&& !pkt
->pkt_has_dczeroa
);
873 tcg_constant_tl(pkt
->pkt_has_store_s1
&& !pkt
->pkt_has_dczeroa
);
875 /* Handy place to set a breakpoint at the end of execution */
876 gen_helper_debug_commit_end(cpu_env
, has_st0
, has_st1
);
879 if (pkt
->vhist_insn
!= NULL
) {
880 ctx
->pre_commit
= false;
881 ctx
->insn
= pkt
->vhist_insn
;
882 pkt
->vhist_insn
->generate(ctx
);
885 if (pkt
->pkt_has_cof
) {
890 static void decode_and_translate_packet(CPUHexagonState
*env
, DisasContext
*ctx
)
892 uint32_t words
[PACKET_WORDS_MAX
];
897 nwords
= read_packet_words(env
, ctx
, words
);
899 gen_exception_end_tb(ctx
, HEX_EXCP_INVALID_PACKET
);
903 if (decode_packet(nwords
, words
, &pkt
, false) > 0) {
904 pkt
.pc
= ctx
->base
.pc_next
;
905 HEX_DEBUG_PRINT_PKT(&pkt
);
907 gen_start_packet(ctx
);
908 for (i
= 0; i
< pkt
.num_insns
; i
++) {
909 ctx
->insn
= &pkt
.insn
[i
];
912 gen_commit_packet(ctx
);
913 ctx
->base
.pc_next
+= pkt
.encod_pkt_size_in_bytes
;
915 gen_exception_end_tb(ctx
, HEX_EXCP_INVALID_PACKET
);
919 static void hexagon_tr_init_disas_context(DisasContextBase
*dcbase
,
922 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
923 uint32_t hex_flags
= dcbase
->tb
->flags
;
925 ctx
->mem_idx
= MMU_USER_IDX
;
926 ctx
->num_packets
= 0;
928 ctx
->num_hvx_insns
= 0;
929 ctx
->branch_cond
= TCG_COND_NEVER
;
930 ctx
->is_tight_loop
= FIELD_EX32(hex_flags
, TB_FLAGS
, IS_TIGHT_LOOP
);
933 static void hexagon_tr_tb_start(DisasContextBase
*db
, CPUState
*cpu
)
937 static void hexagon_tr_insn_start(DisasContextBase
*dcbase
, CPUState
*cpu
)
939 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
941 tcg_gen_insn_start(ctx
->base
.pc_next
);
944 static bool pkt_crosses_page(CPUHexagonState
*env
, DisasContext
*ctx
)
946 target_ulong page_start
= ctx
->base
.pc_first
& TARGET_PAGE_MASK
;
947 bool found_end
= false;
950 for (nwords
= 0; !found_end
&& nwords
< PACKET_WORDS_MAX
; nwords
++) {
951 uint32_t word
= cpu_ldl_code(env
,
952 ctx
->base
.pc_next
+ nwords
* sizeof(uint32_t));
953 found_end
= is_packet_end(word
);
955 uint32_t next_ptr
= ctx
->base
.pc_next
+ nwords
* sizeof(uint32_t);
956 return found_end
&& next_ptr
- page_start
>= TARGET_PAGE_SIZE
;
959 static void hexagon_tr_translate_packet(DisasContextBase
*dcbase
, CPUState
*cpu
)
961 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
962 CPUHexagonState
*env
= cpu
->env_ptr
;
964 decode_and_translate_packet(env
, ctx
);
966 if (ctx
->base
.is_jmp
== DISAS_NEXT
) {
967 target_ulong page_start
= ctx
->base
.pc_first
& TARGET_PAGE_MASK
;
968 target_ulong bytes_max
= PACKET_WORDS_MAX
* sizeof(target_ulong
);
970 if (ctx
->base
.pc_next
- page_start
>= TARGET_PAGE_SIZE
||
971 (ctx
->base
.pc_next
- page_start
>= TARGET_PAGE_SIZE
- bytes_max
&&
972 pkt_crosses_page(env
, ctx
))) {
973 ctx
->base
.is_jmp
= DISAS_TOO_MANY
;
977 * The CPU log is used to compare against LLDB single stepping,
978 * so end the TLB after every packet.
980 HexagonCPU
*hex_cpu
= env_archcpu(env
);
981 if (hex_cpu
->lldb_compat
&& qemu_loglevel_mask(CPU_LOG_TB_CPU
)) {
982 ctx
->base
.is_jmp
= DISAS_TOO_MANY
;
987 static void hexagon_tr_tb_stop(DisasContextBase
*dcbase
, CPUState
*cpu
)
989 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
991 switch (ctx
->base
.is_jmp
) {
993 gen_exec_counters(ctx
);
994 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], ctx
->base
.pc_next
);
995 tcg_gen_exit_tb(NULL
, 0);
1000 g_assert_not_reached();
1004 static void hexagon_tr_disas_log(const DisasContextBase
*dcbase
,
1005 CPUState
*cpu
, FILE *logfile
)
1007 fprintf(logfile
, "IN: %s\n", lookup_symbol(dcbase
->pc_first
));
1008 target_disas(logfile
, cpu
, dcbase
->pc_first
, dcbase
->tb
->size
);
1012 static const TranslatorOps hexagon_tr_ops
= {
1013 .init_disas_context
= hexagon_tr_init_disas_context
,
1014 .tb_start
= hexagon_tr_tb_start
,
1015 .insn_start
= hexagon_tr_insn_start
,
1016 .translate_insn
= hexagon_tr_translate_packet
,
1017 .tb_stop
= hexagon_tr_tb_stop
,
1018 .disas_log
= hexagon_tr_disas_log
,
1021 void gen_intermediate_code(CPUState
*cs
, TranslationBlock
*tb
, int *max_insns
,
1022 target_ulong pc
, void *host_pc
)
1026 translator_loop(cs
, tb
, max_insns
, pc
, host_pc
,
1027 &hexagon_tr_ops
, &ctx
.base
);
1031 static char new_value_names
[TOTAL_PER_THREAD_REGS
][NAME_LEN
];
1032 static char reg_written_names
[TOTAL_PER_THREAD_REGS
][NAME_LEN
];
1033 static char new_pred_value_names
[NUM_PREGS
][NAME_LEN
];
1034 static char store_addr_names
[STORES_MAX
][NAME_LEN
];
1035 static char store_width_names
[STORES_MAX
][NAME_LEN
];
1036 static char store_val32_names
[STORES_MAX
][NAME_LEN
];
1037 static char store_val64_names
[STORES_MAX
][NAME_LEN
];
1038 static char vstore_addr_names
[VSTORES_MAX
][NAME_LEN
];
1039 static char vstore_size_names
[VSTORES_MAX
][NAME_LEN
];
1040 static char vstore_pending_names
[VSTORES_MAX
][NAME_LEN
];
1042 void hexagon_translate_init(void)
1048 for (i
= 0; i
< TOTAL_PER_THREAD_REGS
; i
++) {
1049 hex_gpr
[i
] = tcg_global_mem_new(cpu_env
,
1050 offsetof(CPUHexagonState
, gpr
[i
]),
1051 hexagon_regnames
[i
]);
1053 snprintf(new_value_names
[i
], NAME_LEN
, "new_%s", hexagon_regnames
[i
]);
1054 hex_new_value
[i
] = tcg_global_mem_new(cpu_env
,
1055 offsetof(CPUHexagonState
, new_value
[i
]),
1056 new_value_names
[i
]);
1059 snprintf(reg_written_names
[i
], NAME_LEN
, "reg_written_%s",
1060 hexagon_regnames
[i
]);
1061 hex_reg_written
[i
] = tcg_global_mem_new(cpu_env
,
1062 offsetof(CPUHexagonState
, reg_written
[i
]),
1063 reg_written_names
[i
]);
1066 for (i
= 0; i
< NUM_PREGS
; i
++) {
1067 hex_pred
[i
] = tcg_global_mem_new(cpu_env
,
1068 offsetof(CPUHexagonState
, pred
[i
]),
1069 hexagon_prednames
[i
]);
1071 snprintf(new_pred_value_names
[i
], NAME_LEN
, "new_pred_%s",
1072 hexagon_prednames
[i
]);
1073 hex_new_pred_value
[i
] = tcg_global_mem_new(cpu_env
,
1074 offsetof(CPUHexagonState
, new_pred_value
[i
]),
1075 new_pred_value_names
[i
]);
1077 hex_pred_written
= tcg_global_mem_new(cpu_env
,
1078 offsetof(CPUHexagonState
, pred_written
), "pred_written");
1079 hex_this_PC
= tcg_global_mem_new(cpu_env
,
1080 offsetof(CPUHexagonState
, this_PC
), "this_PC");
1081 hex_slot_cancelled
= tcg_global_mem_new(cpu_env
,
1082 offsetof(CPUHexagonState
, slot_cancelled
), "slot_cancelled");
1083 hex_branch_taken
= tcg_global_mem_new(cpu_env
,
1084 offsetof(CPUHexagonState
, branch_taken
), "branch_taken");
1085 hex_pkt_has_store_s1
= tcg_global_mem_new(cpu_env
,
1086 offsetof(CPUHexagonState
, pkt_has_store_s1
), "pkt_has_store_s1");
1087 hex_dczero_addr
= tcg_global_mem_new(cpu_env
,
1088 offsetof(CPUHexagonState
, dczero_addr
), "dczero_addr");
1089 hex_llsc_addr
= tcg_global_mem_new(cpu_env
,
1090 offsetof(CPUHexagonState
, llsc_addr
), "llsc_addr");
1091 hex_llsc_val
= tcg_global_mem_new(cpu_env
,
1092 offsetof(CPUHexagonState
, llsc_val
), "llsc_val");
1093 hex_llsc_val_i64
= tcg_global_mem_new_i64(cpu_env
,
1094 offsetof(CPUHexagonState
, llsc_val_i64
), "llsc_val_i64");
1095 for (i
= 0; i
< STORES_MAX
; i
++) {
1096 snprintf(store_addr_names
[i
], NAME_LEN
, "store_addr_%d", i
);
1097 hex_store_addr
[i
] = tcg_global_mem_new(cpu_env
,
1098 offsetof(CPUHexagonState
, mem_log_stores
[i
].va
),
1099 store_addr_names
[i
]);
1101 snprintf(store_width_names
[i
], NAME_LEN
, "store_width_%d", i
);
1102 hex_store_width
[i
] = tcg_global_mem_new(cpu_env
,
1103 offsetof(CPUHexagonState
, mem_log_stores
[i
].width
),
1104 store_width_names
[i
]);
1106 snprintf(store_val32_names
[i
], NAME_LEN
, "store_val32_%d", i
);
1107 hex_store_val32
[i
] = tcg_global_mem_new(cpu_env
,
1108 offsetof(CPUHexagonState
, mem_log_stores
[i
].data32
),
1109 store_val32_names
[i
]);
1111 snprintf(store_val64_names
[i
], NAME_LEN
, "store_val64_%d", i
);
1112 hex_store_val64
[i
] = tcg_global_mem_new_i64(cpu_env
,
1113 offsetof(CPUHexagonState
, mem_log_stores
[i
].data64
),
1114 store_val64_names
[i
]);
1116 for (int i
= 0; i
< VSTORES_MAX
; i
++) {
1117 snprintf(vstore_addr_names
[i
], NAME_LEN
, "vstore_addr_%d", i
);
1118 hex_vstore_addr
[i
] = tcg_global_mem_new(cpu_env
,
1119 offsetof(CPUHexagonState
, vstore
[i
].va
),
1120 vstore_addr_names
[i
]);
1122 snprintf(vstore_size_names
[i
], NAME_LEN
, "vstore_size_%d", i
);
1123 hex_vstore_size
[i
] = tcg_global_mem_new(cpu_env
,
1124 offsetof(CPUHexagonState
, vstore
[i
].size
),
1125 vstore_size_names
[i
]);
1127 snprintf(vstore_pending_names
[i
], NAME_LEN
, "vstore_pending_%d", i
);
1128 hex_vstore_pending
[i
] = tcg_global_mem_new(cpu_env
,
1129 offsetof(CPUHexagonState
, vstore_pending
[i
]),
1130 vstore_pending_names
[i
]);