add: performance values for Lattice MachXO2
[zpu.git] / zpu / hdl / zealot / zpu_small.vhdl
blob056b9249b4a18473fc2f03da69a72e5c11bc264a
1 ------------------------------------------------------------------------------
2 ----                                                                      ----
3 ----  ZPU Small                                                           ----
4 ----                                                                      ----
5 ----  http://www.opencores.org/                                           ----
6 ----                                                                      ----
7 ----  Description:                                                        ----
8 ----  ZPU is a 32 bits small stack cpu. This is the small size version.   ----
9 ----  It doesn't support external memories, needs a dual ported memory.   ----
10 ----                                                                      ----
11 ----  To Do:                                                              ----
12 ----  -                                                                   ----
13 ----                                                                      ----
14 ----  Author:                                                             ----
15 ----    - Øyvind Harboe, oyvind.harboe zylin.com                          ----
16 ----    - Salvador E. Tropea, salvador inti.gob.ar                        ----
17 ----                                                                      ----
18 ------------------------------------------------------------------------------
19 ----                                                                      ----
20 ---- Copyright (c) 2008 Øyvind Harboe <oyvind.harboe zylin.com>           ----
21 ---- Copyright (c) 2008 Salvador E. Tropea <salvador inti.gob.ar>         ----
22 ---- Copyright (c) 2008 Instituto Nacional de Tecnología Industrial       ----
23 ----                                                                      ----
24 ---- Distributed under the BSD license                                    ----
25 ----                                                                      ----
26 ------------------------------------------------------------------------------
27 ----                                                                      ----
28 ---- Design unit:      ZPUSmallCore(Behave) (Entity and architecture)     ----
29 ---- File name:        zpu_small.vhdl                                     ----
30 ---- Note:             None                                               ----
31 ---- Limitations:      None known                                         ----
32 ---- Errors:           None known                                         ----
33 ---- Library:          zpu                                                ----
34 ---- Dependencies:     IEEE.std_logic_1164                                ----
35 ----                   IEEE.numeric_std                                   ----
36 ----                   zpu.zpupkg                                         ----
37 ---- Target FPGA:      Spartan 3 (XC3S1500-4-FG456)                       ----
38 ---- Language:         VHDL                                               ----
39 ---- Wishbone:         No                                                 ----
40 ---- Synthesis tools:  Xilinx Release 9.2.03i - xst J.39                  ----
41 ---- Simulation tools: GHDL [Sokcho edition] (0.2x)                       ----
42 ---- Text editor:      SETEdit 0.5.x                                      ----
43 ----                                                                      ----
44 ------------------------------------------------------------------------------
46 library IEEE;
47 use IEEE.std_logic_1164.ALL;
48 use IEEE.numeric_std.all;
50 library zpu;
51 use zpu.zpupkg.all;
53 entity ZPUSmallCore is
54    generic(
55       WORD_SIZE    : integer:=32;  -- Data width 16/32
56       ADDR_W       : integer:=16;  -- Total address space width (incl. I/O)
57       MEM_W        : integer:=15;  -- Memory (prog+data+stack) width
58       D_CARE_VAL   : std_logic:='X'); -- Value used to fill the unsused bits
59    port(
60       clk_i        : in  std_logic; -- System Clock
61       reset_i      : in  std_logic; -- Synchronous Reset
62       interrupt_i  : in  std_logic; -- Interrupt
63       break_o      : out std_logic; -- Breakpoint opcode executed
64       dbg_o        : out zpu_dbgo_t; -- Debug outputs (i.e. trace log)
65       -- BRAM (text, data, bss and stack)
66       a_we_o       : out std_logic; -- BRAM A port Write Enable
67       a_addr_o     : out unsigned(MEM_W-1 downto WORD_SIZE/16):=(others => '0'); -- BRAM A Address
68       a_o          : out unsigned(WORD_SIZE-1 downto 0):=(others => '0'); -- Data to BRAM A port
69       a_i          : in  unsigned(WORD_SIZE-1 downto 0); -- Data from BRAM A port
70       b_we_o       : out std_logic; -- BRAM B port Write Enable
71       b_addr_o     : out unsigned(MEM_W-1 downto WORD_SIZE/16):=(others => '0'); -- BRAM B Address
72       b_o          : out unsigned(WORD_SIZE-1 downto 0):=(others => '0'); -- Data to BRAM B port
73       b_i          : in  unsigned(WORD_SIZE-1 downto 0); -- Data from BRAM B port
74       -- Memory mapped I/O
75       mem_busy_i   : in  std_logic;
76       data_i       : in  unsigned(WORD_SIZE-1 downto 0);
77       data_o       : out unsigned(WORD_SIZE-1 downto 0);
78       addr_o       : out unsigned(ADDR_W-1 downto 0);
79       write_en_o   : out std_logic;
80       read_en_o    : out std_logic);
81 end entity ZPUSmallCore;
83 architecture Behave of ZPUSmallCore is
84    constant MAX_ADDR_BIT : integer:=ADDR_W-2;
85    constant BYTE_BITS    : integer:=WORD_SIZE/16; -- # of bits in a word that addresses bytes
86    -- Stack Pointer initial value: BRAM size-8
87    constant SP_START_1   : unsigned(ADDR_W-1 downto 0):=to_unsigned((2**MEM_W)-8,ADDR_W);
88    constant SP_START     : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=
89                            SP_START_1(MAX_ADDR_BIT downto BYTE_BITS);
90    constant IO_BIT       : integer:=ADDR_W-1; -- Address bit to determine this is an I/O
92    -- Program counter
93    signal pc_r           : unsigned(MAX_ADDR_BIT downto 0):=(others => '0');
94    -- Stack pointer
95    signal sp_r           : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=SP_START;
96    signal idim_r         : std_logic:='0';
98    -- BRAM (text, data, bss and stack)
99    -- a_r is a register for the top of the stack [SP]
100    -- Note: as this is a stack CPU this is a very important register.
101    signal a_we_r         : std_logic:='0';
102    signal a_addr_r       : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=(others => '0');
103    signal a_r            : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
104    -- b_r is a register for the next value in the stack [SP+1]
105    -- We also use the B port to fetch instructions.
106    signal b_we_r         : std_logic:='0';
107    signal b_addr_r       : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=(others => '0');
108    signal b_r            : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
110    -- State machine.
111    type state_t is (st_fetch, st_write_io_done, st_execute, st_add, st_or,
112                     st_and, st_store, st_read_io, st_write_io, st_fetch_next,
113                     st_add_sp, st_decode, st_resync);
114    signal state          : state_t:=st_resync;
116    -- Decoded Opcode
117    type decode_t is (dec_nop, dec_im, dec_load_sp, dec_store_sp, dec_add_sp,
118                      dec_emulate, dec_break, dec_push_sp, dec_pop_pc, dec_add,
119                      dec_or, dec_and, dec_load, dec_not, dec_flip, dec_store,
120                      dec_pop_sp, dec_interrupt);
121    signal d_opcode_r     : decode_t;
122    signal d_opcode       : decode_t;
124    signal opcode         : unsigned(OPCODE_W-1 downto 0); -- Decoded
125    signal opcode_r       : unsigned(OPCODE_W-1 downto 0); -- Registered
127    -- IRQ flag
128    signal in_irq_r       : std_logic:='0';
129    -- I/O space address
130    signal addr_r         : unsigned(ADDR_W-1 downto 0):=(others => '0');
131 begin
132    -- Dual ported memory interface
133    a_we_o    <= a_we_r;
134    a_addr_o  <= a_addr_r(MEM_W-1 downto BYTE_BITS);
135    a_o       <= a_r;
136    b_we_o    <= b_we_r;
137    b_addr_o  <= b_addr_r(MEM_W-1 downto BYTE_BITS);
138    b_o       <= b_r;
140    -------------------------
141    -- Instruction Decoder --
142    -------------------------
143    -- Note: We use Port B memory to fetch the opcodes.
144    decode_control:
145    process(b_i, pc_r)
146       variable topcode : unsigned(OPCODE_W-1 downto 0);
147    begin
148       -- Select the addressed byte inside the fetched word
149       case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is
150            when 0 =>
151                 topcode := to_01( b_i(31 downto 24));
152            when 1 =>
153                 topcode := to_01( b_i(23 downto 16));
154            when 2 =>
155                 topcode := to_01( b_i(15 downto 8));
156            when others => -- 3
157                 topcode := to_01( b_i(7 downto 0));
158       end case;
159       opcode <= topcode;
161       if (topcode(7 downto 7)=OPCODE_IM) then
162          d_opcode <= dec_im;
163       elsif (topcode(7 downto 5)=OPCODE_STORESP) then
164          d_opcode <= dec_store_sp;
165       elsif (topcode(7 downto 5)=OPCODE_LOADSP) then
166          d_opcode <= dec_load_sp;
167       elsif (topcode(7 downto 5)=OPCODE_EMULATE) then
168          d_opcode <= dec_emulate;
169       elsif (topcode(7 downto 4)=OPCODE_ADDSP) then
170          d_opcode <= dec_add_sp;
171       else -- OPCODE_SHORT
172          case topcode(3 downto 0) is
173               when OPCODE_BREAK =>
174                    d_opcode <= dec_break;
175               when OPCODE_PUSHSP =>
176                    d_opcode <= dec_push_sp;
177               when OPCODE_POPPC =>
178                    d_opcode <= dec_pop_pc;
179               when OPCODE_ADD =>
180                    d_opcode <= dec_add;
181               when OPCODE_OR =>
182                    d_opcode <= dec_or;
183               when OPCODE_AND =>
184                    d_opcode <= dec_and;
185               when OPCODE_LOAD =>
186                    d_opcode <= dec_load;
187               when OPCODE_NOT =>
188                    d_opcode <= dec_not;
189               when OPCODE_FLIP =>
190                    d_opcode <= dec_flip;
191               when OPCODE_STORE =>
192                    d_opcode <= dec_store;
193               when OPCODE_POPSP =>
194                    d_opcode <= dec_pop_sp;
195               when others => -- OPCODE_NOP and others
196                    d_opcode <= dec_nop;
197          end case;
198       end if;
199    end process decode_control;
201    data_o <= b_i;
202    opcode_control:
203    process (clk_i)
204       variable sp_offset : unsigned(4 downto 0);
205    begin
206       if rising_edge(clk_i) then
207          break_o      <= '0';
208          write_en_o   <= '0';
209          read_en_o    <= '0';
210          dbg_o.b_inst <= '0';
211          if reset_i='1' then
212             state    <= st_resync;
213             sp_r     <= SP_START;
214             pc_r     <= (others => '0');
215             idim_r   <= '0';
216             a_addr_r <= (others => '0');
217             b_addr_r <= (others => '0');
218             a_we_r   <= '0';
219             b_we_r   <= '0';
220             a_r      <= (others => '0');
221             b_r      <= (others => '0');
222             in_irq_r <= '0';
223             addr_r   <= (others => '0');
224          else -- reset_i/='1'
225             a_we_r <= '0';
226             b_we_r <= '0';
227             -- This saves LUTs, by explicitly declaring that the
228             -- a_o can be left at whatever value if a_we_r is
229             -- not set.
230             a_r <= (others => D_CARE_VAL);
231             b_r <= (others => D_CARE_VAL);
232             sp_offset:=(others => D_CARE_VAL);
233             a_addr_r   <= (others => D_CARE_VAL);
234             b_addr_r   <= (others => D_CARE_VAL);
235             addr_r     <= a_i(ADDR_W-1 downto 0);
236             d_opcode_r <= d_opcode;
237             opcode_r   <= opcode;
238             if interrupt_i='0' then
239                in_irq_r <= '0'; -- no longer in an interrupt
240             end if;
241    
242             case state is
243                  when st_execute =>
244                       state <= st_fetch;
245                       -- At this point:
246                       -- b_i contains opcode word
247                       -- a_i contains top of stack
248                       pc_r <= pc_r+1;
249           
250                       -- Debug info (Trace)
251                       dbg_o.b_inst <= '1';
252                       dbg_o.pc <= (others => '0');
253                       dbg_o.pc(MAX_ADDR_BIT downto 0) <= pc_r;
254                       dbg_o.opcode <= opcode_r;
255                       dbg_o.sp <= (others => '0');
256                       dbg_o.sp(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r;
257                       dbg_o.stk_a <= a_i;
258                       dbg_o.stk_b <= b_i;
259        
260                       -- During the next cycle we'll be reading the next opcode
261                       sp_offset(4):=not opcode_r(4);
262                       sp_offset(3 downto 0):=opcode_r(3 downto 0);
263           
264                       idim_r <= '0';
266                       --------------------
267                       -- Execution Unit --
268                       --------------------
269                       case d_opcode_r is
270                            when dec_interrupt =>
271                                 -- Not a real instruction, but an interrupt
272                                 -- Push(PC); PC=32
273                                 sp_r      <= sp_r-1;
274                                 a_addr_r  <= sp_r-1;
275                                 a_we_r    <= '1';
276                                 a_r       <= (others => D_CARE_VAL);
277                                 a_r(MAX_ADDR_BIT downto 0) <= pc_r;
278                                 -- Jump to ISR
279                                 pc_r <= to_unsigned(32,MAX_ADDR_BIT+1); -- interrupt address
280                                 --report "ZPU jumped to interrupt!" severity note;
281                            when dec_im =>
282                                 idim_r <= '1';
283                                 a_we_r <= '1';
284                                 if idim_r='0' then
285                                    -- First IM
286                                    -- Push the 7 bits (extending the sign)
287                                    sp_r     <= sp_r-1;
288                                    a_addr_r <= sp_r-1;
289                                    a_r <= unsigned(resize(signed(opcode_r(6 downto 0)),WORD_SIZE));
290                                 else
291                                    -- Next IMs, shift the word and put the new value in the lower
292                                    -- bits
293                                    a_addr_r <= sp_r;
294                                    a_r(WORD_SIZE-1 downto 7) <= a_i(WORD_SIZE-8 downto 0);
295                                    a_r(6 downto 0) <= opcode_r(6 downto 0);
296                                 end if;
297                            when dec_store_sp =>
298                                 -- [SP+Offset]=Pop()
299                                 b_we_r   <= '1';
300                                 b_addr_r <= sp_r+sp_offset;
301                                 b_r      <= a_i;
302                                 sp_r     <= sp_r+1;
303                                 state    <= st_resync;
304                            when dec_load_sp =>
305                                 -- Push([SP+Offset])
306                                 sp_r     <= sp_r-1;
307                                 a_addr_r <= sp_r+sp_offset;
308                            when dec_emulate =>
309                                 -- Push(PC+1), PC=Opcode[4:0]*32
310                                 sp_r     <= sp_r-1;
311                                 a_we_r   <= '1';
312                                 a_addr_r <= sp_r-1;
313                                 a_r <= (others => D_CARE_VAL);
314                                 a_r(MAX_ADDR_BIT downto 0) <= pc_r+1;
315                                 -- Jump to NUM*32
316                                 -- The emulate address is:
317                                 --        98 7654 3210
318                                 -- 0000 00aa aaa0 0000
319                                 pc_r <= (others => '0');
320                                 pc_r(9 downto 5) <= opcode_r(4 downto 0);
321                            when dec_add_sp =>
322                                 -- Push(Pop()+[SP+Offset])
323                                 a_addr_r <= sp_r;
324                                 b_addr_r <= sp_r+sp_offset;
325                                 state    <= st_add_sp;
326                            when dec_break =>
327                                 --report "Break instruction encountered" severity failure;
328                                 break_o <= '1';
329                            when dec_push_sp =>
330                                 -- Push(SP)
331                                 sp_r     <= sp_r-1;
332                                 a_we_r   <= '1';
333                                 a_addr_r <= sp_r-1;
334                                 a_r <= (others => D_CARE_VAL);
335                                 a_r(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r;
336                            when dec_pop_pc =>
337                                 -- Pop(PC)
338                                 pc_r  <= a_i(MAX_ADDR_BIT downto 0);
339                                 sp_r  <= sp_r+1;
340                                 state <= st_resync;
341                            when dec_add =>
342                                 -- Push(Pop()+Pop())
343                                 sp_r  <= sp_r+1;
344                                 state <= st_add;
345                            when dec_or =>
346                                 -- Push(Pop() or Pop())
347                                 sp_r  <= sp_r+1;
348                                 state <= st_or;
349                            when dec_and =>
350                                 -- Push(Pop() and Pop())
351                                 sp_r  <= sp_r+1;
352                                 state <= st_and;
353                            when dec_load =>
354                                 -- Push([Pop()])
355                                 if a_i(IO_BIT)='1' then
356                                    addr_r    <= a_i(ADDR_W-1 downto 0);
357                                    read_en_o <= '1';
358                                    state     <= st_read_io;
359                                 else
360                                    a_addr_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS);
361                                 end if;
362                            when dec_not =>
363                                 -- Push(not(Pop()))
364                                 a_addr_r <= sp_r(MAX_ADDR_BIT downto BYTE_BITS);
365                                 a_we_r   <= '1';
366                                 a_r      <= not a_i;
367                            when dec_flip =>
368                                 -- Push(flip(Pop()))
369                                 a_addr_r <= sp_r(MAX_ADDR_BIT downto BYTE_BITS);
370                                 a_we_r   <= '1';
371                                 for i in 0 to WORD_SIZE-1 loop
372                                    a_r(i) <= a_i(WORD_SIZE-1-i);
373                                 end loop;
374                            when dec_store =>
375                                 -- a=Pop(), b=Pop(), [a]=b
376                                 b_addr_r <= sp_r+1;
377                                 sp_r     <= sp_r+1;
378                                 if a_i(IO_BIT)='1' then
379                                    state <= st_write_io;
380                                 else
381                                    state <= st_store;
382                                 end if;
383                            when dec_pop_sp =>
384                                 -- SP=Pop()
385                                 sp_r  <= a_i(MAX_ADDR_BIT downto BYTE_BITS);
386                                 state <= st_resync;
387                            when dec_nop =>
388                                 -- Default, keep addressing to of the stack (A)
389                                 a_addr_r <= sp_r;
390                            when others =>
391                                 null;
392                       end case;
393                  when st_read_io =>
394                       a_addr_r <= sp_r;
395                       -- Wait until memory I/O isn't busy
396                       if mem_busy_i='0' then
397                          state  <= st_fetch;
398                          a_we_r <= '1';
399                          a_r    <= data_i;
400                       end if;
401                  when st_write_io =>
402                       -- [A]=B
403                       sp_r       <= sp_r+1;
404                       write_en_o <= '1';
405                       addr_r     <= a_i(ADDR_W-1 downto 0);
406                       state      <= st_write_io_done;
407                  when st_write_io_done =>
408                       -- Wait until memory I/O isn't busy
409                       if mem_busy_i='0' then
410                          state <= st_resync;
411                       end if;
412                  when st_fetch =>
413                       -- We need to resync. During the *next* cycle
414                       -- we'll fetch the opcode @ pc and thus it will
415                       -- be available for st_execute the cycle after
416                       -- next
417                       b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS);
418                       state    <= st_fetch_next;
419                  when st_fetch_next =>
420                       -- At this point a_i contains the value that is either
421                       -- from the top of stack or should be copied to the top of the stack
422                       a_we_r   <= '1';
423                       a_r      <= a_i;
424                       a_addr_r <= sp_r;
425                       b_addr_r <= sp_r+1;
426                       state    <= st_decode;
427                  when st_decode =>
428                       if interrupt_i='1' and in_irq_r='0' and idim_r='0' then
429                          -- We got an interrupt, execute interrupt instead of next instruction
430                          in_irq_r   <= '1';
431                          d_opcode_r <= dec_interrupt;
432                       end if;
433                       -- during the st_execute cycle we'll be fetching SP+1
434                       a_addr_r <= sp_r;
435                       b_addr_r <= sp_r+1;
436                       state    <= st_execute;
437                  when st_store =>
438                       sp_r     <= sp_r+1;
439                       a_we_r   <= '1';
440                       a_addr_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS);
441                       a_r      <= b_i;
442                       state    <= st_resync;
443                  when st_add_sp =>
444                       state <= st_add;
445                  when st_add =>
446                       a_addr_r <= sp_r;
447                       a_we_r   <= '1';
448                       a_r      <= a_i+b_i;
449                       state    <= st_fetch;
450                  when st_or =>
451                       a_addr_r <= sp_r;
452                       a_we_r   <= '1';
453                       a_r      <= a_i or b_i;
454                       state    <= st_fetch;
455                  when st_and =>
456                       a_addr_r <= sp_r;
457                       a_we_r   <= '1';
458                       a_r      <= a_i and b_i;
459                       state    <= st_fetch;
460                  when st_resync =>
461                       a_addr_r <= sp_r;
462                       state    <= st_fetch;
463                  when others =>
464                       null;
465             end case;
466          end if; -- else reset_i/='1'
467       end if; -- rising_edge(clk_i)
468    end process opcode_control;
469    addr_o <= addr_r;
471 end architecture Behave; -- Entity: ZPUSmallCore