2 * linux/arch/arm26/boot/compressed/head.S
4 * Copyright (C) 1996-2002 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #include <linux/linkage.h>
15 * Note that these macros must not contain any code which is not
16 * 100% relocatable. Any attempt to do so will result in a crash.
17 * Please select one of the following when turning on debugging.
31 .macro debug_reloc_start
34 .macro debug_reloc_end
37 .section ".start", #alloc, #execinstr
39 * sort out different calling conventions
49 .word 0x016f2818 @ Magic numbers to help the loader
50 .word start @ absolute load/run zImage address
51 .word _edata @ zImage end address
52 1: mov r7, r1 @ save architecture ID
54 teqp pc, #0x0c000003 @ turn off interrupts
58 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp}
59 subs r0, r0, r1 @ calculate the delta offset
61 teq r0, #0 @ if delta is zero, we're
62 beq not_relocated @ running at the address we
65 add r2, r2, r0 @ different address, so we
66 add r3, r3, r0 @ need to fix up various
67 add r5, r5, r0 @ pointers.
72 1: ldr r1, [r6, #0] @ relocate entries in the GOT
73 add r1, r1, r0 @ table. This fixes up the
74 str r1, [r6], #4 @ C references.
78 not_relocated: mov r0, #0
79 1: str r0, [r2], #4 @ clear bss
88 mov r1, sp @ malloc space above stack
89 add r2, sp, #0x10000 @ 64k max
92 * Check to see if we will overwrite ourselves.
93 * r4 = final kernel address
94 * r5 = start of this image
95 * r2 = end of malloc space (and therefore this image)
98 * r4 + image length <= r5 -> OK
102 add r0, r4, #4096*1024 @ 4MB largest kernel size
106 mov r5, r2 @ decompress after malloc space
112 bic r0, r0, #127 @ align the kernel length
114 * r0 = decompressed kernel length
116 * r4 = kernel execution address
117 * r5 = decompressed kernel start
119 * r7 = architecture ID
122 add r1, r5, r0 @ end of decompressed kernel
126 1: ldmia r2!, {r8 - r13} @ copy relocation code
127 stmia r1!, {r8 - r13}
128 ldmia r2!, {r8 - r13}
129 stmia r1!, {r8 - r13}
134 add pc, r5, r0 @ call relocation code
137 * We're not in danger of overwriting ourselves. Do this the simple way.
139 * r4 = kernel execution address
140 * r7 = architecture ID
142 wont_overwrite: mov r0, r4
149 .word __bss_start @ r2
151 .word _load_addr @ r4
153 .word _got_start @ r6
155 .word user_stack+4096 @ sp
156 LC1: .word reloc_end - reloc_start
160 * Turn on the cache. We need to setup some page tables so that we
161 * can have both the I and D caches on.
163 * We place the page tables 16k down from the kernel execution address,
164 * and we hope that nothing else is using it. If we're using it, we
168 * r4 = kernel execution address
170 * r7 = architecture number
171 * r8 = run-time address of "start"
173 * r1, r2, r3, r8, r9, r12 corrupted
174 * This routine must preserve:
178 cache_on: mov r3, #8 @ cache_on function
181 __setup_mmu: sub r3, r4, #16384 @ Page directory size
182 bic r3, r3, #0xff @ Align the pointer
185 * Initialise the page tables, turning on the cacheable and bufferable
186 * bits for the RAM area only.
190 mov r8, r8, lsl #18 @ start of RAM
191 add r9, r8, #0x10000000 @ a reasonable RAM size
195 1: cmp r1, r8 @ if virt > start of RAM
196 orrhs r1, r1, #0x0c @ set cacheable, bufferable
197 cmp r1, r9 @ if virt > end of RAM
198 bichs r1, r1, #0x0c @ clear cacheable, bufferable
199 str r1, [r0], #4 @ 1:1 mapping
204 * If ever we are running from Flash, then we surely want the cache
205 * to be enabled also for our execution instance... We map 2MB of it
206 * so there is no map overlap problem for up to 1 MB compressed kernel.
207 * If the execution is in RAM then we would only be duplicating the above.
212 orr r1, r1, r2, lsl #20
213 add r0, r3, r2, lsl #2
223 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
224 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
225 mrc p15, 0, r0, c1, c0, 0 @ read control reg
226 orr r0, r0, #0x1000 @ I-cache enable
234 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
235 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
239 orr r0, r0, #0x000d @ Write buffer, mmu
242 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
243 mcr p15, 0, r1, c3, c0, 0 @ load domain access control
244 mcr p15, 0, r0, c1, c0, 0 @ load control register
248 * All code following this line is relocatable. It is relocated by
249 * the above code to the end of the decompressed kernel image and
250 * executed there. During this time, we have no stacks.
252 * r0 = decompressed kernel length
254 * r4 = kernel execution address
255 * r5 = decompressed kernel start
257 * r7 = architecture ID
261 reloc_start: add r8, r5, r0
266 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel
267 stmia r1!, {r0, r2, r3, r9 - r13}
274 call_kernel: bl cache_clean_flush
277 mov r1, r7 @ restore architecture number
278 mov pc, r4 @ call kernel
281 * Here follow the relocatable cache support functions for the
282 * various processors. This is a generic hook for locating an
283 * entry and jumping to an instruction at the specified offset
284 * from the start of the block. Please note this is all position
294 call_cache_fn: adr r12, proc_types
295 mrc p15, 0, r6, c0, c0 @ get processor ID
296 1: ldr r1, [r12, #0] @ get value
297 ldr r2, [r12, #4] @ get mask
298 eor r1, r1, r6 @ (real ^ match)
300 addeq pc, r12, r3 @ call cache function
305 * Table for cache operations. This is basically:
308 * - 'cache on' method instruction
309 * - 'cache off' method instruction
310 * - 'cache flush' method instruction
312 * We match an entry using: ((real_id ^ match) & mask) == 0
314 * Writethrough caches generally only need 'on' and 'off'
315 * methods. Writeback caches _must_ have the flush method
318 .type proc_types,#object
320 .word 0x41560600 @ ARM6/610
322 b __arm6_cache_off @ works, but slow
325 @ b __arm6_cache_on @ untested
327 @ b __armv3_cache_flush
329 .word 0x41007000 @ ARM7/710
335 .word 0x41807200 @ ARM720T (writethrough)
341 .word 0x41129200 @ ARM920T
345 b __armv4_cache_flush
347 .word 0x4401a100 @ sa110 / sa1100
351 b __armv4_cache_flush
353 .word 0x6901b110 @ sa1110
357 b __armv4_cache_flush
359 .word 0x69050000 @ xscale
363 b __armv4_cache_flush
365 .word 0 @ unrecognised type
371 .size proc_types, . - proc_types
374 * Turn off the Cache and MMU. ARMv3 does not support
375 * reading the control register, but ARMv4 does.
377 * On entry, r6 = processor ID
378 * On exit, r0, r1, r2, r3, r12 corrupted
379 * This routine must preserve: r4, r6, r7
382 cache_off: mov r3, #12 @ cache_off function
386 mrc p15, 0, r0, c1, c0
388 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
390 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
391 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
395 mov r0, #0x00000030 @ ARM6 control reg.
399 mov r0, #0x00000070 @ ARM7 control reg.
403 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off
405 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
406 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
410 * Clean and flush the cache to maintain consistency.
415 * r1, r2, r3, r12 corrupted
416 * This routine must preserve:
426 add r2, r1, #65536 @ 2x the largest dcache size
427 1: ldr r12, [r1], #32 @ s/w flush D cache
431 mcr p15, 0, r1, c7, c7, 0 @ flush I cache
432 mcr p15, 0, r1, c7, c10, 4 @ drain WB
437 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
441 * Various debugging routines for printing hex characters and
442 * memory, which again must be relocatable.
445 .type phexbuf,#object
447 .size phexbuf, . - phexbuf
449 phex: adr r3, phexbuf
486 2: mov r0, r11, lsl #2
494 ldr r0, [r12, r11, lsl #2]
515 .section ".stack", "aw"
516 user_stack: .space 4096