2 * linux/arch/arm26/boot/compressed/head.S
4 * Copyright (C) 1996-2002 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #include <linux/config.h>
11 #include <linux/linkage.h>
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable. Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
32 .macro debug_reloc_start
35 .macro debug_reloc_end
38 .section ".start", #alloc, #execinstr
40 * sort out different calling conventions
50 .word 0x016f2818 @ Magic numbers to help the loader
51 .word start @ absolute load/run zImage address
52 .word _edata @ zImage end address
53 1: mov r7, r1 @ save architecture ID
55 teqp pc, #0x0c000003 @ turn off interrupts
59 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp}
60 subs r0, r0, r1 @ calculate the delta offset
62 teq r0, #0 @ if delta is zero, we're
63 beq not_relocated @ running at the address we
66 add r2, r2, r0 @ different address, so we
67 add r3, r3, r0 @ need to fix up various
68 add r5, r5, r0 @ pointers.
73 1: ldr r1, [r6, #0] @ relocate entries in the GOT
74 add r1, r1, r0 @ table. This fixes up the
75 str r1, [r6], #4 @ C references.
79 not_relocated: mov r0, #0
80 1: str r0, [r2], #4 @ clear bss
89 mov r1, sp @ malloc space above stack
90 add r2, sp, #0x10000 @ 64k max
93 * Check to see if we will overwrite ourselves.
94 * r4 = final kernel address
95 * r5 = start of this image
96 * r2 = end of malloc space (and therefore this image)
99 * r4 + image length <= r5 -> OK
103 add r0, r4, #4096*1024 @ 4MB largest kernel size
107 mov r5, r2 @ decompress after malloc space
113 bic r0, r0, #127 @ align the kernel length
115 * r0 = decompressed kernel length
117 * r4 = kernel execution address
118 * r5 = decompressed kernel start
120 * r7 = architecture ID
123 add r1, r5, r0 @ end of decompressed kernel
127 1: ldmia r2!, {r8 - r13} @ copy relocation code
128 stmia r1!, {r8 - r13}
129 ldmia r2!, {r8 - r13}
130 stmia r1!, {r8 - r13}
135 add pc, r5, r0 @ call relocation code
138 * We're not in danger of overwriting ourselves. Do this the simple way.
140 * r4 = kernel execution address
141 * r7 = architecture ID
143 wont_overwrite: mov r0, r4
150 .word __bss_start @ r2
152 .word _load_addr @ r4
154 .word _got_start @ r6
156 .word user_stack+4096 @ sp
157 LC1: .word reloc_end - reloc_start
161 * Turn on the cache. We need to setup some page tables so that we
162 * can have both the I and D caches on.
164 * We place the page tables 16k down from the kernel execution address,
165 * and we hope that nothing else is using it. If we're using it, we
169 * r4 = kernel execution address
171 * r7 = architecture number
172 * r8 = run-time address of "start"
174 * r1, r2, r3, r8, r9, r12 corrupted
175 * This routine must preserve:
179 cache_on: mov r3, #8 @ cache_on function
182 __setup_mmu: sub r3, r4, #16384 @ Page directory size
183 bic r3, r3, #0xff @ Align the pointer
186 * Initialise the page tables, turning on the cacheable and bufferable
187 * bits for the RAM area only.
191 mov r8, r8, lsl #18 @ start of RAM
192 add r9, r8, #0x10000000 @ a reasonable RAM size
196 1: cmp r1, r8 @ if virt > start of RAM
197 orrhs r1, r1, #0x0c @ set cacheable, bufferable
198 cmp r1, r9 @ if virt > end of RAM
199 bichs r1, r1, #0x0c @ clear cacheable, bufferable
200 str r1, [r0], #4 @ 1:1 mapping
205 * If ever we are running from Flash, then we surely want the cache
206 * to be enabled also for our execution instance... We map 2MB of it
207 * so there is no map overlap problem for up to 1 MB compressed kernel.
208 * If the execution is in RAM then we would only be duplicating the above.
213 orr r1, r1, r2, lsl #20
214 add r0, r3, r2, lsl #2
224 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
225 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
226 mrc p15, 0, r0, c1, c0, 0 @ read control reg
227 orr r0, r0, #0x1000 @ I-cache enable
235 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
236 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
240 orr r0, r0, #0x000d @ Write buffer, mmu
243 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
244 mcr p15, 0, r1, c3, c0, 0 @ load domain access control
245 mcr p15, 0, r0, c1, c0, 0 @ load control register
249 * All code following this line is relocatable. It is relocated by
250 * the above code to the end of the decompressed kernel image and
251 * executed there. During this time, we have no stacks.
253 * r0 = decompressed kernel length
255 * r4 = kernel execution address
256 * r5 = decompressed kernel start
258 * r7 = architecture ID
262 reloc_start: add r8, r5, r0
267 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel
268 stmia r1!, {r0, r2, r3, r9 - r13}
275 call_kernel: bl cache_clean_flush
278 mov r1, r7 @ restore architecture number
279 mov pc, r4 @ call kernel
282 * Here follow the relocatable cache support functions for the
283 * various processors. This is a generic hook for locating an
284 * entry and jumping to an instruction at the specified offset
285 * from the start of the block. Please note this is all position
295 call_cache_fn: adr r12, proc_types
296 mrc p15, 0, r6, c0, c0 @ get processor ID
297 1: ldr r1, [r12, #0] @ get value
298 ldr r2, [r12, #4] @ get mask
299 eor r1, r1, r6 @ (real ^ match)
301 addeq pc, r12, r3 @ call cache function
306 * Table for cache operations. This is basically:
309 * - 'cache on' method instruction
310 * - 'cache off' method instruction
311 * - 'cache flush' method instruction
313 * We match an entry using: ((real_id ^ match) & mask) == 0
315 * Writethrough caches generally only need 'on' and 'off'
316 * methods. Writeback caches _must_ have the flush method
319 .type proc_types,#object
321 .word 0x41560600 @ ARM6/610
323 b __arm6_cache_off @ works, but slow
326 @ b __arm6_cache_on @ untested
328 @ b __armv3_cache_flush
330 .word 0x41007000 @ ARM7/710
336 .word 0x41807200 @ ARM720T (writethrough)
342 .word 0x41129200 @ ARM920T
346 b __armv4_cache_flush
348 .word 0x4401a100 @ sa110 / sa1100
352 b __armv4_cache_flush
354 .word 0x6901b110 @ sa1110
358 b __armv4_cache_flush
360 .word 0x69050000 @ xscale
364 b __armv4_cache_flush
366 .word 0 @ unrecognised type
372 .size proc_types, . - proc_types
375 * Turn off the Cache and MMU. ARMv3 does not support
376 * reading the control register, but ARMv4 does.
378 * On entry, r6 = processor ID
379 * On exit, r0, r1, r2, r3, r12 corrupted
380 * This routine must preserve: r4, r6, r7
383 cache_off: mov r3, #12 @ cache_off function
387 mrc p15, 0, r0, c1, c0
389 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
391 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
392 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
396 mov r0, #0x00000030 @ ARM6 control reg.
400 mov r0, #0x00000070 @ ARM7 control reg.
404 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off
406 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
407 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
411 * Clean and flush the cache to maintain consistency.
416 * r1, r2, r3, r12 corrupted
417 * This routine must preserve:
427 add r2, r1, #65536 @ 2x the largest dcache size
428 1: ldr r12, [r1], #32 @ s/w flush D cache
432 mcr p15, 0, r1, c7, c7, 0 @ flush I cache
433 mcr p15, 0, r1, c7, c10, 4 @ drain WB
438 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
442 * Various debugging routines for printing hex characters and
443 * memory, which again must be relocatable.
446 .type phexbuf,#object
448 .size phexbuf, . - phexbuf
450 phex: adr r3, phexbuf
487 2: mov r0, r11, lsl #2
495 ldr r0, [r12, r11, lsl #2]
516 .section ".stack", "aw"
517 user_stack: .space 4096