1 ------------------------------------------------------------------------------
5 ---- http://www.opencores.org/ ----
8 ---- ZPU is a 32 bits small stack cpu. This is the medium size version. ----
9 ---- Supports external memories. ----
15 ---- - Øyvind Harboe, oyvind.harboe zylin.com ----
16 ---- - Salvador E. Tropea, salvador inti.gob.ar ----
18 ------------------------------------------------------------------------------
20 ---- Copyright (c) 2008 Øyvind Harboe <oyvind.harboe zylin.com> ----
21 ---- Copyright (c) 2008 Salvador E. Tropea <salvador inti.gob.ar> ----
22 ---- Copyright (c) 2008 Instituto Nacional de Tecnología Industrial ----
24 ---- Distributed under the BSD license ----
26 ------------------------------------------------------------------------------
28 ---- Design unit: ZPUMediumCore(Behave) (Entity and architecture) ----
29 ---- File name: zpu_medium.vhdl ----
31 ---- Limitations: None known ----
32 ---- Errors: None known ----
33 ---- Library: zpu ----
34 ---- Dependencies: IEEE.std_logic_1164 ----
35 ---- IEEE.numeric_std ----
37 ---- Target FPGA: Spartan 3 (XC3S400-4-FT256) ----
38 ---- Language: VHDL ----
39 ---- Wishbone: No ----
40 ---- Synthesis tools: Xilinx Release 9.2.03i - xst J.39 ----
41 ---- Simulation tools: GHDL [Sokcho edition] (0.2x) ----
42 ---- Text editor: SETEdit 0.5.x ----
44 ------------------------------------------------------------------------------
46 -- write_en_o - set to '1' for a single cycle to send off a write request.
47 -- data_o is valid only while write_en_o='1'.
48 -- read_en_o - set to '1' for a single cycle to send off a read request.
49 -- mem_busy_i - It is illegal to send off a read/write request when
51 -- Set to '0' when data_i is valid after a read request.
52 -- If it goes to '1'(busy), it is on the cycle after read/
54 -- addr_o - address for read/write request
55 -- data_i - read data. Valid only on the cycle after mem_busy_i='0'
56 -- after read_en_o='1' for a single cycle.
57 -- data_o - data to write
58 -- break_o - set to '1' when CPU hits break instruction
61 use IEEE.std_logic_1164.all;
62 use IEEE.numeric_std.all;
67 entity ZPUMediumCore is
69 WORD_SIZE : integer:=32; -- 16/32 (2**wordPower)
70 ADDR_W : integer:=16; -- Total address space width (incl. I/O)
71 MEM_W : integer:=15; -- Memory (prog+data+stack) width
72 D_CARE_VAL : std_logic:='X'; -- Value used to fill the unsused bits
73 MULT_PIPE : boolean:=false; -- Pipeline multiplication
74 BINOP_PIPE : integer range 0 to 2:=0; -- Pipeline binary operations (-, =, < and <=)
75 ENA_LEVEL0 : boolean:=true; -- eq, loadb, neqbranch and pushspadd
76 ENA_LEVEL1 : boolean:=true; -- lessthan, ulessthan, mult, storeb, callpcrel and sub
77 ENA_LEVEL2 : boolean:=false; -- lessthanorequal, ulessthanorequal, call and poppcrel
78 ENA_LSHR : boolean:=true; -- lshiftright
79 ENA_IDLE : boolean:=false; -- Enable the enable_i input
80 FAST_FETCH : boolean:=true); -- Merge the st_fetch with the st_execute states
82 clk_i : in std_logic; -- CPU Clock
83 reset_i : in std_logic; -- Sync Reset
84 enable_i : in std_logic; -- Hold the CPU (after reset)
85 break_o : out std_logic; -- Break instruction executed
86 dbg_o : out zpu_dbgo_t; -- Debug outputs (i.e. trace log)
88 mem_busy_i : in std_logic; -- Memory is busy
89 data_i : in unsigned(WORD_SIZE-1 downto 0); -- Data from mem
90 data_o : out unsigned(WORD_SIZE-1 downto 0); -- Data to mem
91 addr_o : out unsigned(ADDR_W-1 downto 0); -- Memory address
92 write_en_o : out std_logic; -- Memory write enable
93 read_en_o : out std_logic); -- Memory read enable
94 end entity ZPUMediumCore;
96 architecture Behave of ZPUMediumCore is
97 constant BYTE_BITS : integer:=WORD_SIZE/16; -- # of bits in a word that addresses bytes
98 constant WORD_BYTES : integer:=WORD_SIZE/OPCODE_W;
99 constant MAX_ADDR_BIT : integer:=ADDR_W-2;
100 -- Stack Pointer initial value: BRAM size-8
101 constant SP_START_1 : unsigned(ADDR_W-1 downto 0):=to_unsigned((2**MEM_W)-8,ADDR_W);
102 constant SP_START : unsigned(ADDR_W-1 downto BYTE_BITS):=
103 SP_START_1(ADDR_W-1 downto BYTE_BITS);
105 -- Update [SP+1]. We hold it in b_r, this writes the value to memory.
106 procedure FlushB(signal we : out std_logic;
107 signal addr : out unsigned(ADDR_W-1 downto BYTE_BITS);
108 signal inc_sp : in unsigned(ADDR_W-1 downto BYTE_BITS);
109 signal data : out unsigned(WORD_SIZE-1 downto 0);
110 signal b : in unsigned(WORD_SIZE-1 downto 0)) is
115 end procedure FlushB;
117 -- Do a simple stack push, it is performed in the internal cache registers,
118 -- not in the real memory.
119 procedure Push(signal sp : inout unsigned(ADDR_W-1 downto BYTE_BITS);
120 signal a : in unsigned(WORD_SIZE-1 downto 0);
121 signal b : out unsigned(WORD_SIZE-1 downto 0)) is
123 b <= a; -- Update cache [SP+1]=[SP]
127 -- Do a simple stack pop, it is performed in the internal cache registers,
128 -- not in the real memory.
129 procedure Pop(signal sp : inout unsigned(ADDR_W-1 downto BYTE_BITS);
130 signal a : out unsigned(WORD_SIZE-1 downto 0);
131 signal b : in unsigned(WORD_SIZE-1 downto 0)) is
133 a <= b; -- Update cache [SP]=[SP+1]
137 -- Expand a PC value to WORD_SIZE
138 function ExpandPC(v : unsigned(ADDR_W-1 downto 0)) return unsigned is
139 variable nv : unsigned(WORD_SIZE-1 downto 0);
142 nv(ADDR_W-1 downto 0):=v;
144 end function ExpandPC;
147 signal pc_r : unsigned(ADDR_W-1 downto 0):=(others => '0');
149 signal sp_r : unsigned(ADDR_W-1 downto BYTE_BITS):=SP_START;
150 -- SP+1, SP+2 and SP-1 are very used, these are shortcuts
151 signal inc_sp : unsigned(ADDR_W-1 downto BYTE_BITS);
152 signal inc_inc_sp : unsigned(ADDR_W-1 downto BYTE_BITS);
153 -- a_r is a cache for the top of the stack [SP]
154 -- Note: as this is a stack CPU this is a very important register.
155 signal a_r : unsigned(WORD_SIZE-1 downto 0);
156 -- b_r is a cache for the next value in the stack [SP+1]
157 signal b_r : unsigned(WORD_SIZE-1 downto 0);
158 signal bin_op_res1_r : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
159 signal bin_op_res2_r : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
160 signal mult_res1_r : unsigned(WORD_SIZE-1 downto 0);
161 signal mult_res2_r : unsigned(WORD_SIZE-1 downto 0);
162 signal mult_res3_r : unsigned(WORD_SIZE-1 downto 0);
163 signal mult_a_r : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
164 signal mult_b_r : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
165 signal idim_r : std_logic;
166 signal write_en_r : std_logic;
167 signal read_en_r : std_logic;
168 signal addr_r : unsigned(ADDR_W-1 downto BYTE_BITS):=(others => '0');
169 signal fetched_w_r : unsigned(WORD_SIZE-1 downto 0);
171 type state_t is(st_load2, st_popped, st_load_sp2, st_load_sp3, st_add_sp2,
172 st_fetch, st_execute, st_decode, st_decode2, st_resync,
173 st_store_sp2, st_resync2, st_resync3, st_loadb2, st_storeb2,
174 st_mult2, st_mult3, st_mult5, st_mult4, st_binary_op_res2,
175 st_binary_op_res, st_idle);
176 signal state : state_t:=st_resync;
178 -- Go to st_fetch state or just do its work
179 procedure DoFetch(constant FAST : boolean;
180 signal state : out state_t;
181 signal addr : out unsigned(ADDR_W-1 downto BYTE_BITS);
182 signal pc : in unsigned(ADDR_W-1 downto 0);
183 signal re : out std_logic;
184 signal busy : in std_logic) is
187 -- Equivalent to st_fetch
189 addr <= pc(ADDR_W-1 downto BYTE_BITS);
196 end procedure DoFetch;
198 -- Perform a "binary operation" (2 operands)
199 procedure DoBinOp(result : in unsigned(WORD_SIZE-1 downto 0);
200 signal state : out state_t;
201 signal sp : inout unsigned(ADDR_W-1 downto BYTE_BITS);
202 signal addr : out unsigned(ADDR_W-1 downto BYTE_BITS);
203 signal re : out std_logic;
204 signal dest : out unsigned(WORD_SIZE-1 downto 0);
205 signal dest_p : out unsigned(WORD_SIZE-1 downto 0);
206 constant DEPTH : natural) is
209 -- 2 clocks: st_binary_op_res+st_binary_op_res2
210 state <= st_binary_op_res;
213 -- 1 clock: st_binary_op_res2
214 state <= st_binary_op_res2;
223 end procedure DoBinOp;
225 -- Perform a boolean "binary operation" (2 operands)
226 procedure DoBinOpBool(result : in boolean;
227 signal state : out state_t;
228 signal sp : inout unsigned(ADDR_W-1 downto BYTE_BITS);
229 signal addr : out unsigned(ADDR_W-1 downto BYTE_BITS);
230 signal re : out std_logic;
231 signal dest : out unsigned(WORD_SIZE-1 downto 0);
232 signal dest_p : out unsigned(WORD_SIZE-1 downto 0);
233 constant DEPTH : natural) is
234 variable res : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
239 DoBinOp(res,state,sp,addr,re,dest,dest_p,DEPTH);
240 end procedure DoBinOpBool;
242 type insn_t is (dec_add_top, dec_dup, dec_dup_stk_b, dec_pop, dec_add,
243 dec_or, dec_and, dec_store, dec_add_sp, dec_shift, dec_nop,
244 dec_im, dec_load_sp, dec_store_sp, dec_emulate, dec_load,
245 dec_push_sp, dec_pop_pc, dec_pop_pc_rel, dec_not, dec_flip,
246 dec_pop_sp, dec_neq_branch, dec_eq, dec_loadb, dec_mult,
247 dec_less_than, dec_less_than_or_equal, dec_lshr,
248 dec_u_less_than_or_equal, dec_u_less_than, dec_push_sp_add,
249 dec_call, dec_call_pc_rel, dec_sub, dec_break, dec_storeb,
250 dec_insn_fetch, dec_pop_down);
251 signal insn : insn_t;
252 type insn_array_t is array(0 to WORD_BYTES-1) of insn_t;
253 signal insns : insn_array_t;
254 type opcode_array_t is array(0 to WORD_BYTES-1) of unsigned(OPCODE_W-1 downto 0);
255 signal opcode_r : opcode_array_t;
257 -- the memory subsystem will tell us one cycle later whether or
259 write_en_o <= write_en_r;
260 read_en_o <= read_en_r;
261 addr_o(ADDR_W-1 downto BYTE_BITS) <= addr_r;
262 addr_o(BYTE_BITS-1 downto 0) <= (others => '0');
266 inc_inc_sp <= sp_r+2;
270 variable topcode : unsigned(OPCODE_W-1 downto 0);
271 variable ex_opcode : unsigned(OPCODE_W-1 downto 0);
272 variable sp_offset : unsigned(4 downto 0);
273 variable tsp_offset : unsigned(4 downto 0);
274 variable next_pc : unsigned(ADDR_W-1 downto 0);
275 variable tdecoded : insn_t;
276 variable tinsns : insn_array_t;
277 variable mult_res : unsigned(WORD_SIZE*2-1 downto 0);
278 variable ipc_low : integer range 0 to 3; -- Address inside a word (pc_r)
279 variable inpc_low : integer range 0 to 3; -- Address inside a word (next_pc)
280 variable h_bit : integer;
281 variable l_bit : integer;
282 variable not_lshr : std_logic:='1';
284 if rising_edge(clk_i) then
293 pc_r <= (others => '0');
297 mult_a_r <= (others => '0');
298 mult_b_r <= (others => '0');
300 -- Reseting add_r here makes XST fail to use BRAMs ?!
303 -- We must multiply unconditionally to get pipelined multiplication
304 mult_res:=mult_a_r*mult_b_r;
305 mult_res1_r <= mult_res(WORD_SIZE-1 downto 0);
306 mult_res2_r <= mult_res1_r;
307 mult_res3_r <= mult_res2_r;
308 mult_a_r <= (others => D_CARE_VAL);
309 mult_b_r <= (others => D_CARE_VAL);
313 bin_op_res2_r <= bin_op_res1_r; -- pipeline a bit.
318 -- Allow synthesis tools to load bogus values when we don't
319 -- care about the address and output data.
320 addr_r <= (others => D_CARE_VAL);
321 data_o <= (others => D_CARE_VAL);
323 if (write_en_r='1') and (read_en_r='1') then
324 report "read/write collision" severity failure;
327 ipc_low:=to_integer(pc_r(BYTE_BITS-1 downto 0));
328 sp_offset(4):=not opcode_r(ipc_low)(4);
329 sp_offset(3 downto 0):=opcode_r(ipc_low)(3 downto 0);
332 -- Prepare trace snapshot
333 dbg_o.opcode <= opcode_r(ipc_low);
334 dbg_o.pc <= resize(pc_r,32);
335 dbg_o.stk_a <= resize(a_r,32);
336 dbg_o.stk_b <= resize(b_r,32);
338 dbg_o.sp <= (others => '0');
339 dbg_o.sp(ADDR_W-1 downto BYTE_BITS) <= sp_r;
346 -- Initial state of ZPU, fetch top of stack (A/B) + first instruction
348 if mem_busy_i='0' then
354 if mem_busy_i='0' then
361 if mem_busy_i='0' then
363 addr_r <= pc_r(ADDR_W-1 downto BYTE_BITS);
368 if mem_busy_i='0' then
369 -- Here we latch the fetched word to give one full clock
370 -- cycle to the instruction decoder. This could be removed
371 -- if using BRAMs and the decoder delay isn't important.
372 fetched_w_r <= data_i;
376 -- decode 4 instructions in parallel
377 for i in 0 to WORD_BYTES-1 loop
378 topcode:=fetched_w_r((WORD_BYTES-1-i+1)*8-1 downto (WORD_BYTES-1-i)*8);
380 tsp_offset(4):=not topcode(4);
381 tsp_offset(3 downto 0):=topcode(3 downto 0);
383 opcode_r(i) <= topcode;
384 if topcode(7 downto 7)=OPCODE_IM then
386 elsif topcode(7 downto 5)=OPCODE_STORESP then
388 -- Special case, we can avoid a write
390 elsif tsp_offset=1 then
391 -- Special case, collision
392 tdecoded:=dec_pop_down;
394 tdecoded:=dec_store_sp;
396 elsif topcode(7 downto 5)=OPCODE_LOADSP then
399 elsif tsp_offset=1 then
400 tdecoded:=dec_dup_stk_b;
402 tdecoded:=dec_load_sp;
404 elsif topcode(7 downto 5)=OPCODE_EMULATE then
405 tdecoded:=dec_emulate;
406 if ENA_LEVEL0 and topcode(5 downto 0)=OPCODE_NEQBRANCH then
407 tdecoded:=dec_neq_branch;
408 elsif ENA_LEVEL0 and topcode(5 downto 0)=OPCODE_EQ then
410 elsif ENA_LEVEL0 and topcode(5 downto 0)=OPCODE_LOADB then
412 elsif ENA_LEVEL0 and topcode(5 downto 0)=OPCODE_PUSHSPADD then
413 tdecoded:=dec_push_sp_add;
414 elsif ENA_LEVEL1 and topcode(5 downto 0)=OPCODE_LESSTHAN then
415 tdecoded:=dec_less_than;
416 elsif ENA_LEVEL1 and topcode(5 downto 0)=OPCODE_ULESSTHAN then
417 tdecoded:=dec_u_less_than;
418 elsif ENA_LEVEL1 and topcode(5 downto 0)=OPCODE_MULT then
420 elsif ENA_LEVEL1 and topcode(5 downto 0)=OPCODE_STOREB then
421 tdecoded:=dec_storeb;
422 elsif ENA_LEVEL1 and topcode(5 downto 0)=OPCODE_CALLPCREL then
423 tdecoded:=dec_call_pc_rel;
424 elsif ENA_LEVEL1 and topcode(5 downto 0)=OPCODE_SUB then
426 elsif ENA_LEVEL2 and topcode(5 downto 0)=OPCODE_LESSTHANOREQUAL then
427 tdecoded:=dec_less_than_or_equal;
428 elsif ENA_LEVEL2 and topcode(5 downto 0)=OPCODE_ULESSTHANOREQUAL then
429 tdecoded:=dec_u_less_than_or_equal;
430 elsif ENA_LEVEL2 and topcode(5 downto 0)=OPCODE_CALL then
432 elsif ENA_LEVEL2 and topcode(5 downto 0)=OPCODE_POPPCREL then
433 tdecoded:=dec_pop_pc_rel;
434 elsif ENA_LSHR and topcode(5 downto 0)=OPCODE_LSHIFTRIGHT then
437 elsif topcode(7 downto 4)=OPCODE_ADDSP then
440 elsif tsp_offset=1 then
441 tdecoded:=dec_add_top;
443 tdecoded:=dec_add_sp;
446 case topcode(3 downto 0) is
449 when OPCODE_PUSHSP =>
450 tdecoded:=dec_push_sp;
452 tdecoded:=dec_pop_pc;
468 tdecoded:=dec_pop_sp;
469 when others => -- OPCODE_NOP and others
476 insn <= tinsns(ipc_low);
477 -- once we wrap, we need to fetch
478 tinsns(0):=dec_insn_fetch;
482 -- Each instruction must:
484 -- 1. increase pc_r if applicable
485 -- 2. set next state if applicable
486 -- 3. do it's operation
488 -- Some shortcut to make the code readable:
489 inpc_low:=to_integer(next_pc(BYTE_BITS-1 downto 0));
490 ex_opcode:=opcode_r(ipc_low);
491 insn <= insns(inpc_low);
492 -- Defaults used by most instructions
493 if insn/=dec_insn_fetch and insn/=dec_im then
498 when dec_insn_fetch =>
499 -- Not a real instruction, fetch new instructions
500 DoFetch(FAST_FETCH,state,addr_r,pc_r,read_en_r,mem_busy_i);
502 -- Push(immediate value), IDIM=1
503 -- if IDIM=0 Push(signed(opcode & 0x7F)) else
504 -- Push((Pop()<<7)|(opcode&0x7F))
505 if mem_busy_i='0' then
510 -- We already started an IM sequence
512 a_r(WORD_SIZE-1 downto 7) <= a_r(WORD_SIZE-8 downto 0);
514 a_r(6 downto 0) <= ex_opcode(6 downto 0);
516 -- First IM, push the value sign extended
517 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
518 a_r <= unsigned(resize(signed(ex_opcode(6 downto 0)),WORD_SIZE));
524 if mem_busy_i='0' then
526 addr_r <= sp_r+sp_offset;
529 -- We need to fetch B
530 state <= st_store_sp2;
534 if mem_busy_i='0' then
535 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
537 -- We are flushing B cache, so we need more time to
539 state <= st_load_sp2;
542 -- Push(PC+1), PC=Opcode[4:0]*32
543 if mem_busy_i='0' then
544 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
546 a_r <= ExpandPC(pc_r+1);
548 -- The emulate address is:
550 -- 0000 00aa aaa0 0000
551 pc_r <= (others => '0');
552 pc_r(9 downto 5) <= ex_opcode(4 downto 0);
554 when dec_call_pc_rel =>
555 -- t=Pop(), Push(PC+1), PC=PC+t
556 if mem_busy_i='0' and ENA_LEVEL1 then
558 a_r <= ExpandPC(pc_r+1);
559 pc_r <= pc_r+a_r(ADDR_W-1 downto 0);
562 -- t=Pop(), Push(PC+1), PC=t
563 if mem_busy_i='0' and ENA_LEVEL2 then
565 a_r <= ExpandPC(pc_r+1);
566 pc_r <= a_r(ADDR_W-1 downto 0);
569 -- Push(Pop()+[SP+Offset])
570 if mem_busy_i='0' then
574 addr_r <= sp_r+sp_offset;
579 if mem_busy_i='0' then
580 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
582 a_r <= (others => '0');
583 a_r(ADDR_W-1 downto BYTE_BITS) <= sp_r;
588 if mem_busy_i='0' then
589 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
591 pc_r <= a_r(ADDR_W-1 downto 0);
594 when dec_pop_pc_rel =>
596 if mem_busy_i='0' and ENA_LEVEL2 then
597 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
599 pc_r <= a_r(ADDR_W-1 downto 0)+pc_r;
603 -- Push(Pop()+Pop()) [A=A+B, SP++, update B]
604 if mem_busy_i='0' then
608 addr_r <= inc_inc_sp;
612 -- a=Pop(), b=Pop(), Push(b-a)
613 if mem_busy_i='0' and ENA_LEVEL1 then
614 DoBinOp(b_r-a_r,state,sp_r,addr_r,read_en_r,
615 a_r,bin_op_res1_r,BINOP_PIPE);
619 if mem_busy_i='0' then
621 addr_r <= inc_inc_sp;
626 -- t=Pop(), Pop(), Push(t)
627 if mem_busy_i='0' then
628 -- PopDown leaves top of stack unchanged
630 addr_r <= inc_inc_sp;
635 -- Push(Pop() or Pop())
636 if mem_busy_i='0' then
640 addr_r <= inc_inc_sp;
644 -- Push(Pop() and Pop())
645 if mem_busy_i='0' then
649 addr_r <= inc_inc_sp;
653 -- a=Pop(), b=Pop(), Push(a=b ? 1 : 0)
654 if mem_busy_i='0' and ENA_LEVEL0 then
655 DoBinOpBool(a_r=b_r,state,sp_r,addr_r,read_en_r,
656 a_r,bin_op_res1_r,BINOP_PIPE);
658 when dec_u_less_than =>
659 -- a=Pop(), b=Pop(), Push(a<b ? 1 : 0)
660 if mem_busy_i='0' and ENA_LEVEL1 then
661 DoBinOpBool(a_r<b_r,state,sp_r,addr_r,read_en_r,
662 a_r,bin_op_res1_r,BINOP_PIPE);
664 when dec_u_less_than_or_equal =>
665 -- a=Pop(), b=Pop(), Push(a<=b ? 1 : 0)
666 if mem_busy_i='0' and ENA_LEVEL2 then
667 DoBinOpBool(a_r<=b_r,state,sp_r,addr_r,read_en_r,
668 a_r,bin_op_res1_r,BINOP_PIPE);
670 when dec_less_than =>
671 -- a=signed(Pop()), b=signed(Pop()), Push(a<b ? 1 : 0)
672 if mem_busy_i='0' and ENA_LEVEL1 then
673 DoBinOpBool(signed(a_r)<signed(b_r),state,sp_r,
674 addr_r,read_en_r,a_r,bin_op_res1_r,
677 when dec_less_than_or_equal =>
678 -- a=signed(Pop()), b=signed(Pop()), Push(a<=b ? 1 : 0)
679 if mem_busy_i='0' and ENA_LEVEL2 then
680 DoBinOpBool(signed(a_r)<=signed(b_r),state,sp_r,
681 addr_r,read_en_r,a_r,bin_op_res1_r,
686 if mem_busy_i='0' then
688 addr_r <= a_r(ADDR_W-1 downto BYTE_BITS);
693 -- t=Pop(), Push(t), Push(t)
694 if mem_busy_i='0' then
696 -- A is dupped, no change
698 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
700 when dec_dup_stk_b =>
701 -- Pop(), t=Pop(), Push(t), Push(t), Push(t)
702 if mem_busy_i='0' then
707 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
710 -- a=Pop(), b=Pop(), [a]=b
711 if mem_busy_i='0' then
714 addr_r <= a_r(ADDR_W-1 downto BYTE_BITS);
721 if mem_busy_i='0' then
722 FlushB(write_en_r,addr_r,inc_sp,data_o,b_r);
725 sp_r <= a_r(ADDR_W-1 downto BYTE_BITS);
736 for i in 0 to WORD_SIZE-1 loop
737 a_r(i) <= a_r(WORD_SIZE-1-i);
740 -- a=Pop(), b=Pop(), Push(b), Push(a+b)
744 -- Push(Pop()<<1) [equivalent to a=Pop(), Push(a+a)]
746 a_r(WORD_SIZE-1 downto 1) <= a_r(WORD_SIZE-2 downto 0);
748 when dec_push_sp_add =>
752 a_r <= (others => '0');
753 a_r(ADDR_W-1 downto BYTE_BITS) <=
754 a_r(ADDR_W-1-BYTE_BITS downto 0)+sp_r;
756 when dec_neq_branch =>
757 -- a=Pop(), b=Pop(), PC+=b==0 ? 1 : a
758 -- Branches are almost always taken as they form loops
761 -- Need to fetch stack again.
764 pc_r <= a_r(ADDR_W-1 downto 0)+pc_r;
778 mult_res1_r <= mult_res(WORD_SIZE-1 downto 0);
783 -- Assert the break_o signal
784 --report "Break instruction encountered" severity failure;
788 -- Push([Pop()] & 0xFF) (byte address)
789 if mem_busy_i='0' and ENA_LEVEL0 then
791 addr_r <= a_r(ADDR_W-1 downto BYTE_BITS);
796 -- [Pop()]=Pop() & 0xFF (byte address)
797 if mem_busy_i='0' and ENA_LEVEL1 then
799 addr_r <= a_r(ADDR_W-1 downto BYTE_BITS);
804 -- a=Pop(), b=Pop(), Push(b>>(a&0x3F))
806 -- This instruction takes more than one cycle.
807 -- We must avoid duplications in the trace log.
808 dbg_o.b_inst <= not_lshr;
810 if a_r(5 downto 0)=0 then -- Only 6 bits used
812 if mem_busy_i='0' then
816 addr_r <= inc_inc_sp;
820 else -- More shifts needed
821 b_r <= "0"&b_r(WORD_SIZE-1 downto 1);
822 a_r(5 downto 0) <= a_r(5 downto 0)-1;
827 -- Undefined behavior, we shouldn't get here.
828 -- It only helps synthesis tools.
829 sp_r <= (others => D_CARE_VAL);
830 report "Illegal decode instruction?!" severity failure;
833 -- The followup of operations that takes more than one execution clock
835 if mem_busy_i='0' then
841 if mem_busy_i='0' then
842 state <= st_load_sp3;
843 -- Now we can read SP+Offset (SP already decremented)
845 addr_r <= sp_r+sp_offset+1;
848 if mem_busy_i='0' then
849 -- Note: We can't increment PC in the decode stage
850 -- because it will modify sp_offset.
852 -- Finally we have the result in A
857 if mem_busy_i='0' then
862 if mem_busy_i='0' then
867 if mem_busy_i='0' then
868 a_r <= (others => '0');
869 -- Select the source bits using the less significant bits (byte address)
870 h_bit:=(WORD_BYTES-to_integer(a_r(BYTE_BITS-1 downto 0)))*8-1;
872 a_r(7 downto 0) <= data_i(h_bit downto l_bit);
876 if mem_busy_i='0' then
877 addr_r <= a_r(ADDR_W-1 downto BYTE_BITS);
879 -- Select the source bits using the less significant bits (byte address)
880 h_bit:=(WORD_BYTES-to_integer(a_r(BYTE_BITS-1 downto 0)))*8-1;
882 data_o(h_bit downto l_bit) <= b_r(7 downto 0);
888 if mem_busy_i='0' then
889 addr_r <= pc_r(ADDR_W-1 downto BYTE_BITS);
893 -- The following states can be used to leave cycles free for
894 -- tools that can automagically decompose the multiplication
895 -- in various stages. Xilinx tools can do it to increase the
896 -- multipliers performance.
904 if mem_busy_i='0' then
911 addr_r <= inc_inc_sp;
915 when st_binary_op_res =>
917 state <= st_binary_op_res2;
918 when st_binary_op_res2 =>
921 addr_r <= inc_inc_sp;
925 a_r <= bin_op_res2_r;
927 a_r <= bin_op_res1_r;
930 if mem_busy_i='0' then
931 -- Note: Moving this PC++ to the decoder seems to
932 -- consume more LUTs.
938 -- Undefined behavior, we shouldn't get here.
939 -- It only helps synthesis tools.
940 sp_r <= (others => D_CARE_VAL);
941 report "Illegal state?!" severity failure;
944 end if; -- else reset_i='1'
945 end if; -- rising_edge(clk_i)
946 end process opcode_control;
947 end architecture Behave; -- Entity: ZPUMediumCore