2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/nodemask.h>
44 #include <video/edid.h>
46 #include <asm/mpspec.h>
47 #include <asm/setup.h>
48 #include <asm/arch_hooks.h>
49 #include <asm/sections.h>
50 #include <asm/io_apic.h>
53 #include "setup_arch_pre.h"
54 #include <bios_ebda.h>
56 /* This value is set up by the early boot code to point to the value
57 immediately after the boot time page tables. It contains a *physical*
58 address, and must not be in the .bss segment! */
59 unsigned long init_pg_tables_end __initdata
= ~0UL;
61 int disable_pse __initdata
= 0;
69 EXPORT_SYMBOL(efi_enabled
);
72 /* cpu data as detected by the assembly code in head.S */
73 struct cpuinfo_x86 new_cpu_data __initdata
= { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
74 /* common cpu data for all cpus */
75 struct cpuinfo_x86 boot_cpu_data
= { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
77 unsigned long mmu_cr4_features
;
79 #ifdef CONFIG_ACPI_INTERPRETER
80 int acpi_disabled
= 0;
82 int acpi_disabled
= 1;
84 EXPORT_SYMBOL(acpi_disabled
);
86 #ifdef CONFIG_ACPI_BOOT
87 int __initdata acpi_force
= 0;
88 extern acpi_interrupt_flags acpi_sci_flags
;
91 /* for MCA, but anyone else can use it if they want */
92 unsigned int machine_id
;
93 unsigned int machine_submodel_id
;
94 unsigned int BIOS_revision
;
95 unsigned int mca_pentium_flag
;
97 /* For PCI or other memory-mapped resources */
98 unsigned long pci_mem_start
= 0x10000000;
100 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
103 /* user-defined highmem size */
104 static unsigned int highmem_pages
= -1;
109 struct drive_info_struct
{ char dummy
[32]; } drive_info
;
110 struct screen_info screen_info
;
111 struct apm_info apm_info
;
112 struct sys_desc_table_struct
{
113 unsigned short length
;
114 unsigned char table
[0];
116 struct edid_info edid_info
;
117 struct ist_info ist_info
;
120 extern void early_cpu_init(void);
121 extern void dmi_scan_machine(void);
122 extern void generic_apic_probe(char *);
123 extern int root_mountflags
;
125 unsigned long saved_videomode
;
127 #define RAMDISK_IMAGE_START_MASK 0x07FF
128 #define RAMDISK_PROMPT_FLAG 0x8000
129 #define RAMDISK_LOAD_FLAG 0x4000
131 static char command_line
[COMMAND_LINE_SIZE
];
133 unsigned char __initdata boot_params
[PARAM_SIZE
];
135 static struct resource data_resource
= {
136 .name
= "Kernel data",
139 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
142 static struct resource code_resource
= {
143 .name
= "Kernel code",
146 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
149 static struct resource system_rom_resource
= {
150 .name
= "System ROM",
153 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
156 static struct resource extension_rom_resource
= {
157 .name
= "Extension ROM",
160 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
163 static struct resource adapter_rom_resources
[] = { {
164 .name
= "Adapter ROM",
167 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
169 .name
= "Adapter ROM",
172 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
174 .name
= "Adapter ROM",
177 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
179 .name
= "Adapter ROM",
182 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
184 .name
= "Adapter ROM",
187 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
189 .name
= "Adapter ROM",
192 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
195 #define ADAPTER_ROM_RESOURCES \
196 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
198 static struct resource video_rom_resource
= {
202 .flags
= IORESOURCE_BUSY
| IORESOURCE_READONLY
| IORESOURCE_MEM
205 static struct resource video_ram_resource
= {
206 .name
= "Video RAM area",
209 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
212 static struct resource standard_io_resources
[] = { {
216 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
221 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
226 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
231 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
236 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
238 .name
= "dma page reg",
241 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
246 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
251 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
256 .flags
= IORESOURCE_BUSY
| IORESOURCE_IO
259 #define STANDARD_IO_RESOURCES \
260 (sizeof standard_io_resources / sizeof standard_io_resources[0])
262 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
264 static int __init
romchecksum(unsigned char *rom
, unsigned long length
)
266 unsigned char *p
, sum
= 0;
268 for (p
= rom
; p
< rom
+ length
; p
++)
273 static void __init
probe_roms(void)
275 unsigned long start
, length
, upper
;
280 upper
= adapter_rom_resources
[0].start
;
281 for (start
= video_rom_resource
.start
; start
< upper
; start
+= 2048) {
282 rom
= isa_bus_to_virt(start
);
283 if (!romsignature(rom
))
286 video_rom_resource
.start
= start
;
288 /* 0 < length <= 0x7f * 512, historically */
289 length
= rom
[2] * 512;
291 /* if checksum okay, trust length byte */
292 if (length
&& romchecksum(rom
, length
))
293 video_rom_resource
.end
= start
+ length
- 1;
295 request_resource(&iomem_resource
, &video_rom_resource
);
299 start
= (video_rom_resource
.end
+ 1 + 2047) & ~2047UL;
304 request_resource(&iomem_resource
, &system_rom_resource
);
305 upper
= system_rom_resource
.start
;
307 /* check for extension rom (ignore length byte!) */
308 rom
= isa_bus_to_virt(extension_rom_resource
.start
);
309 if (romsignature(rom
)) {
310 length
= extension_rom_resource
.end
- extension_rom_resource
.start
+ 1;
311 if (romchecksum(rom
, length
)) {
312 request_resource(&iomem_resource
, &extension_rom_resource
);
313 upper
= extension_rom_resource
.start
;
317 /* check for adapter roms on 2k boundaries */
318 for (i
= 0; i
< ADAPTER_ROM_RESOURCES
&& start
< upper
; start
+= 2048) {
319 rom
= isa_bus_to_virt(start
);
320 if (!romsignature(rom
))
323 /* 0 < length <= 0x7f * 512, historically */
324 length
= rom
[2] * 512;
326 /* but accept any length that fits if checksum okay */
327 if (!length
|| start
+ length
> upper
|| !romchecksum(rom
, length
))
330 adapter_rom_resources
[i
].start
= start
;
331 adapter_rom_resources
[i
].end
= start
+ length
- 1;
332 request_resource(&iomem_resource
, &adapter_rom_resources
[i
]);
334 start
= adapter_rom_resources
[i
++].end
& ~2047UL;
338 static void __init
limit_regions(unsigned long long size
)
340 unsigned long long current_addr
= 0;
344 for (i
= 0; i
< memmap
.nr_map
; i
++) {
345 current_addr
= memmap
.map
[i
].phys_addr
+
346 (memmap
.map
[i
].num_pages
<< 12);
347 if (memmap
.map
[i
].type
== EFI_CONVENTIONAL_MEMORY
) {
348 if (current_addr
>= size
) {
349 memmap
.map
[i
].num_pages
-=
350 (((current_addr
-size
) + PAGE_SIZE
-1) >> PAGE_SHIFT
);
351 memmap
.nr_map
= i
+ 1;
357 for (i
= 0; i
< e820
.nr_map
; i
++) {
358 if (e820
.map
[i
].type
== E820_RAM
) {
359 current_addr
= e820
.map
[i
].addr
+ e820
.map
[i
].size
;
360 if (current_addr
>= size
) {
361 e820
.map
[i
].size
-= current_addr
-size
;
369 static void __init
add_memory_region(unsigned long long start
,
370 unsigned long long size
, int type
)
378 printk(KERN_ERR
"Ooops! Too many entries in the memory map!\n");
382 e820
.map
[x
].addr
= start
;
383 e820
.map
[x
].size
= size
;
384 e820
.map
[x
].type
= type
;
387 } /* add_memory_region */
391 static void __init
print_memory_map(char *who
)
395 for (i
= 0; i
< e820
.nr_map
; i
++) {
396 printk(" %s: %016Lx - %016Lx ", who
,
398 e820
.map
[i
].addr
+ e820
.map
[i
].size
);
399 switch (e820
.map
[i
].type
) {
400 case E820_RAM
: printk("(usable)\n");
403 printk("(reserved)\n");
406 printk("(ACPI data)\n");
409 printk("(ACPI NVS)\n");
411 default: printk("type %lu\n", e820
.map
[i
].type
);
418 * Sanitize the BIOS e820 map.
420 * Some e820 responses include overlapping entries. The following
421 * replaces the original e820 map with a new one, removing overlaps.
424 struct change_member
{
425 struct e820entry
*pbios
; /* pointer to original bios entry */
426 unsigned long long addr
; /* address for this change point */
428 static struct change_member change_point_list
[2*E820MAX
] __initdata
;
429 static struct change_member
*change_point
[2*E820MAX
] __initdata
;
430 static struct e820entry
*overlap_list
[E820MAX
] __initdata
;
431 static struct e820entry new_bios
[E820MAX
] __initdata
;
433 static int __init
sanitize_e820_map(struct e820entry
* biosmap
, char * pnr_map
)
435 struct change_member
*change_tmp
;
436 unsigned long current_type
, last_type
;
437 unsigned long long last_addr
;
438 int chgidx
, still_changing
;
441 int old_nr
, new_nr
, chg_nr
;
445 Visually we're performing the following (1,2,3,4 = memory types)...
447 Sample memory map (w/overlaps):
448 ____22__________________
449 ______________________4_
450 ____1111________________
451 _44_____________________
452 11111111________________
453 ____________________33__
454 ___________44___________
455 __________33333_________
456 ______________22________
457 ___________________2222_
458 _________111111111______
459 _____________________11_
460 _________________4______
462 Sanitized equivalent (no overlap):
463 1_______________________
464 _44_____________________
465 ___1____________________
466 ____22__________________
467 ______11________________
468 _________1______________
469 __________3_____________
470 ___________44___________
471 _____________33_________
472 _______________2________
473 ________________1_______
474 _________________4______
475 ___________________2____
476 ____________________33__
477 ______________________4_
480 /* if there's only one memory region, don't bother */
486 /* bail out if we find any unreasonable addresses in bios map */
487 for (i
=0; i
<old_nr
; i
++)
488 if (biosmap
[i
].addr
+ biosmap
[i
].size
< biosmap
[i
].addr
)
491 /* create pointers for initial change-point information (for sorting) */
492 for (i
=0; i
< 2*old_nr
; i
++)
493 change_point
[i
] = &change_point_list
[i
];
495 /* record all known change-points (starting and ending addresses),
496 omitting those that are for empty memory regions */
498 for (i
=0; i
< old_nr
; i
++) {
499 if (biosmap
[i
].size
!= 0) {
500 change_point
[chgidx
]->addr
= biosmap
[i
].addr
;
501 change_point
[chgidx
++]->pbios
= &biosmap
[i
];
502 change_point
[chgidx
]->addr
= biosmap
[i
].addr
+ biosmap
[i
].size
;
503 change_point
[chgidx
++]->pbios
= &biosmap
[i
];
506 chg_nr
= chgidx
; /* true number of change-points */
508 /* sort change-point list by memory addresses (low -> high) */
510 while (still_changing
) {
512 for (i
=1; i
< chg_nr
; i
++) {
513 /* if <current_addr> > <last_addr>, swap */
514 /* or, if current=<start_addr> & last=<end_addr>, swap */
515 if ((change_point
[i
]->addr
< change_point
[i
-1]->addr
) ||
516 ((change_point
[i
]->addr
== change_point
[i
-1]->addr
) &&
517 (change_point
[i
]->addr
== change_point
[i
]->pbios
->addr
) &&
518 (change_point
[i
-1]->addr
!= change_point
[i
-1]->pbios
->addr
))
521 change_tmp
= change_point
[i
];
522 change_point
[i
] = change_point
[i
-1];
523 change_point
[i
-1] = change_tmp
;
529 /* create a new bios memory map, removing overlaps */
530 overlap_entries
=0; /* number of entries in the overlap table */
531 new_bios_entry
=0; /* index for creating new bios map entries */
532 last_type
= 0; /* start with undefined memory type */
533 last_addr
= 0; /* start with 0 as last starting address */
534 /* loop through change-points, determining affect on the new bios map */
535 for (chgidx
=0; chgidx
< chg_nr
; chgidx
++)
537 /* keep track of all overlapping bios entries */
538 if (change_point
[chgidx
]->addr
== change_point
[chgidx
]->pbios
->addr
)
540 /* add map entry to overlap list (> 1 entry implies an overlap) */
541 overlap_list
[overlap_entries
++]=change_point
[chgidx
]->pbios
;
545 /* remove entry from list (order independent, so swap with last) */
546 for (i
=0; i
<overlap_entries
; i
++)
548 if (overlap_list
[i
] == change_point
[chgidx
]->pbios
)
549 overlap_list
[i
] = overlap_list
[overlap_entries
-1];
553 /* if there are overlapping entries, decide which "type" to use */
554 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
556 for (i
=0; i
<overlap_entries
; i
++)
557 if (overlap_list
[i
]->type
> current_type
)
558 current_type
= overlap_list
[i
]->type
;
559 /* continue building up new bios map based on this information */
560 if (current_type
!= last_type
) {
561 if (last_type
!= 0) {
562 new_bios
[new_bios_entry
].size
=
563 change_point
[chgidx
]->addr
- last_addr
;
564 /* move forward only if the new size was non-zero */
565 if (new_bios
[new_bios_entry
].size
!= 0)
566 if (++new_bios_entry
>= E820MAX
)
567 break; /* no more space left for new bios entries */
569 if (current_type
!= 0) {
570 new_bios
[new_bios_entry
].addr
= change_point
[chgidx
]->addr
;
571 new_bios
[new_bios_entry
].type
= current_type
;
572 last_addr
=change_point
[chgidx
]->addr
;
574 last_type
= current_type
;
577 new_nr
= new_bios_entry
; /* retain count for new bios entries */
579 /* copy new bios mapping into original location */
580 memcpy(biosmap
, new_bios
, new_nr
*sizeof(struct e820entry
));
587 * Copy the BIOS e820 map into a safe place.
589 * Sanity-check it while we're at it..
591 * If we're lucky and live on a modern system, the setup code
592 * will have given us a memory map that we can use to properly
593 * set up memory. If we aren't, we'll fake a memory map.
595 * We check to see that the memory map contains at least 2 elements
596 * before we'll use it, because the detection code in setup.S may
597 * not be perfect and most every PC known to man has two memory
598 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
599 * thinkpad 560x, for example, does not cooperate with the memory
602 static int __init
copy_e820_map(struct e820entry
* biosmap
, int nr_map
)
604 /* Only one memory region (or negative)? Ignore it */
609 unsigned long long start
= biosmap
->addr
;
610 unsigned long long size
= biosmap
->size
;
611 unsigned long long end
= start
+ size
;
612 unsigned long type
= biosmap
->type
;
614 /* Overflow in 64 bits? Ignore the memory map. */
619 * Some BIOSes claim RAM in the 640k - 1M region.
620 * Not right. Fix it up.
622 if (type
== E820_RAM
) {
623 if (start
< 0x100000ULL
&& end
> 0xA0000ULL
) {
624 if (start
< 0xA0000ULL
)
625 add_memory_region(start
, 0xA0000ULL
-start
, type
);
626 if (end
<= 0x100000ULL
)
632 add_memory_region(start
, size
, type
);
633 } while (biosmap
++,--nr_map
);
637 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
639 #ifdef CONFIG_EDD_MODULE
643 * copy_edd() - Copy the BIOS EDD information
644 * from boot_params into a safe place.
647 static inline void copy_edd(void)
649 memcpy(edd
.mbr_signature
, EDD_MBR_SIGNATURE
, sizeof(edd
.mbr_signature
));
650 memcpy(edd
.edd_info
, EDD_BUF
, sizeof(edd
.edd_info
));
651 edd
.mbr_signature_nr
= EDD_MBR_SIG_NR
;
652 edd
.edd_info_nr
= EDD_NR
;
655 static inline void copy_edd(void)
661 * Do NOT EVER look at the BIOS memory size location.
662 * It does not work on many machines.
664 #define LOWMEMSIZE() (0x9f000)
666 static void __init
parse_cmdline_early (char ** cmdline_p
)
668 char c
= ' ', *to
= command_line
, *from
= saved_command_line
;
672 /* Save unparsed command line copy for /proc/cmdline */
673 saved_command_line
[COMMAND_LINE_SIZE
-1] = '\0';
679 * "mem=nopentium" disables the 4MB page tables.
680 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
681 * to <mem>, overriding the bios size.
682 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
683 * <start> to <start>+<mem>, overriding the bios size.
685 * HPA tells me bootloaders need to parse mem=, so no new
686 * option should be mem= [also see Documentation/i386/boot.txt]
688 if (!memcmp(from
, "mem=", 4)) {
689 if (to
!= command_line
)
691 if (!memcmp(from
+4, "nopentium", 9)) {
693 clear_bit(X86_FEATURE_PSE
, boot_cpu_data
.x86_capability
);
696 /* If the user specifies memory size, we
697 * limit the BIOS-provided memory map to
698 * that size. exactmap can be used to specify
699 * the exact map. mem=number can be used to
700 * trim the existing memory map.
702 unsigned long long mem_size
;
704 mem_size
= memparse(from
+4, &from
);
705 limit_regions(mem_size
);
710 else if (!memcmp(from
, "memmap=", 7)) {
711 if (to
!= command_line
)
713 if (!memcmp(from
+7, "exactmap", 8)) {
718 /* If the user specifies memory size, we
719 * limit the BIOS-provided memory map to
720 * that size. exactmap can be used to specify
721 * the exact map. mem=number can be used to
722 * trim the existing memory map.
724 unsigned long long start_at
, mem_size
;
726 mem_size
= memparse(from
+7, &from
);
728 start_at
= memparse(from
+1, &from
);
729 add_memory_region(start_at
, mem_size
, E820_RAM
);
730 } else if (*from
== '#') {
731 start_at
= memparse(from
+1, &from
);
732 add_memory_region(start_at
, mem_size
, E820_ACPI
);
733 } else if (*from
== '$') {
734 start_at
= memparse(from
+1, &from
);
735 add_memory_region(start_at
, mem_size
, E820_RESERVED
);
737 limit_regions(mem_size
);
743 else if (!memcmp(from
, "noexec=", 7))
744 noexec_setup(from
+ 7);
747 #ifdef CONFIG_X86_SMP
749 * If the BIOS enumerates physical processors before logical,
750 * maxcpus=N at enumeration-time can be used to disable HT.
752 else if (!memcmp(from
, "maxcpus=", 8)) {
753 extern unsigned int maxcpus
;
755 maxcpus
= simple_strtoul(from
+ 8, NULL
, 0);
759 #ifdef CONFIG_ACPI_BOOT
760 /* "acpi=off" disables both ACPI table parsing and interpreter */
761 else if (!memcmp(from
, "acpi=off", 8)) {
765 /* acpi=force to over-ride black-list */
766 else if (!memcmp(from
, "acpi=force", 10)) {
772 /* acpi=strict disables out-of-spec workarounds */
773 else if (!memcmp(from
, "acpi=strict", 11)) {
777 /* Limit ACPI just to boot-time to enable HT */
778 else if (!memcmp(from
, "acpi=ht", 7)) {
784 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
785 else if (!memcmp(from
, "pci=noacpi", 10)) {
788 /* "acpi=noirq" disables ACPI interrupt routing */
789 else if (!memcmp(from
, "acpi=noirq", 10)) {
793 else if (!memcmp(from
, "acpi_sci=edge", 13))
794 acpi_sci_flags
.trigger
= 1;
796 else if (!memcmp(from
, "acpi_sci=level", 14))
797 acpi_sci_flags
.trigger
= 3;
799 else if (!memcmp(from
, "acpi_sci=high", 13))
800 acpi_sci_flags
.polarity
= 1;
802 else if (!memcmp(from
, "acpi_sci=low", 12))
803 acpi_sci_flags
.polarity
= 3;
805 #ifdef CONFIG_X86_IO_APIC
806 else if (!memcmp(from
, "acpi_skip_timer_override", 24))
807 acpi_skip_timer_override
= 1;
810 #ifdef CONFIG_X86_LOCAL_APIC
811 /* disable IO-APIC */
812 else if (!memcmp(from
, "noapic", 6))
813 disable_ioapic_setup();
814 #endif /* CONFIG_X86_LOCAL_APIC */
815 #endif /* CONFIG_ACPI_BOOT */
818 * highmem=size forces highmem to be exactly 'size' bytes.
819 * This works even on boxes that have no highmem otherwise.
820 * This also works to reduce highmem size on bigger boxes.
822 else if (!memcmp(from
, "highmem=", 8))
823 highmem_pages
= memparse(from
+8, &from
) >> PAGE_SHIFT
;
826 * vmalloc=size forces the vmalloc area to be exactly 'size'
827 * bytes. This can be used to increase (or decrease) the
828 * vmalloc area - the default is 128m.
830 else if (!memcmp(from
, "vmalloc=", 8))
831 __VMALLOC_RESERVE
= memparse(from
+8, &from
);
837 if (COMMAND_LINE_SIZE
<= ++len
)
842 *cmdline_p
= command_line
;
844 printk(KERN_INFO
"user-defined physical RAM map:\n");
845 print_memory_map("user");
850 * Callback for efi_memory_walk.
853 efi_find_max_pfn(unsigned long start
, unsigned long end
, void *arg
)
855 unsigned long *max_pfn
= arg
, pfn
;
858 pfn
= PFN_UP(end
-1);
867 * Find the highest page frame number we have available
869 void __init
find_max_pfn(void)
875 efi_memmap_walk(efi_find_max_pfn
, &max_pfn
);
879 for (i
= 0; i
< e820
.nr_map
; i
++) {
880 unsigned long start
, end
;
882 if (e820
.map
[i
].type
!= E820_RAM
)
884 start
= PFN_UP(e820
.map
[i
].addr
);
885 end
= PFN_DOWN(e820
.map
[i
].addr
+ e820
.map
[i
].size
);
894 * Determine low and high memory ranges:
896 unsigned long __init
find_max_low_pfn(void)
898 unsigned long max_low_pfn
;
900 max_low_pfn
= max_pfn
;
901 if (max_low_pfn
> MAXMEM_PFN
) {
902 if (highmem_pages
== -1)
903 highmem_pages
= max_pfn
- MAXMEM_PFN
;
904 if (highmem_pages
+ MAXMEM_PFN
< max_pfn
)
905 max_pfn
= MAXMEM_PFN
+ highmem_pages
;
906 if (highmem_pages
+ MAXMEM_PFN
> max_pfn
) {
907 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn
- MAXMEM_PFN
), pages_to_mb(highmem_pages
));
910 max_low_pfn
= MAXMEM_PFN
;
911 #ifndef CONFIG_HIGHMEM
912 /* Maximum memory usable is what is directly addressable */
913 printk(KERN_WARNING
"Warning only %ldMB will be used.\n",
915 if (max_pfn
> MAX_NONPAE_PFN
)
916 printk(KERN_WARNING
"Use a PAE enabled kernel.\n");
918 printk(KERN_WARNING
"Use a HIGHMEM enabled kernel.\n");
919 max_pfn
= MAXMEM_PFN
;
920 #else /* !CONFIG_HIGHMEM */
921 #ifndef CONFIG_X86_PAE
922 if (max_pfn
> MAX_NONPAE_PFN
) {
923 max_pfn
= MAX_NONPAE_PFN
;
924 printk(KERN_WARNING
"Warning only 4GB will be used.\n");
925 printk(KERN_WARNING
"Use a PAE enabled kernel.\n");
927 #endif /* !CONFIG_X86_PAE */
928 #endif /* !CONFIG_HIGHMEM */
930 if (highmem_pages
== -1)
932 #ifdef CONFIG_HIGHMEM
933 if (highmem_pages
>= max_pfn
) {
934 printk(KERN_ERR
"highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages
), pages_to_mb(max_pfn
));
938 if (max_low_pfn
-highmem_pages
< 64*1024*1024/PAGE_SIZE
){
939 printk(KERN_ERR
"highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages
));
942 max_low_pfn
-= highmem_pages
;
946 printk(KERN_ERR
"ignoring highmem size on non-highmem kernel!\n");
953 * Free all available memory for boot time allocation. Used
954 * as a callback function by efi_memory_walk()
958 free_available_memory(unsigned long start
, unsigned long end
, void *arg
)
960 /* check max_low_pfn */
961 if (start
>= ((max_low_pfn
+ 1) << PAGE_SHIFT
))
963 if (end
>= ((max_low_pfn
+ 1) << PAGE_SHIFT
))
964 end
= (max_low_pfn
+ 1) << PAGE_SHIFT
;
966 free_bootmem(start
, end
- start
);
971 * Register fully available low RAM pages with the bootmem allocator.
973 static void __init
register_bootmem_low_pages(unsigned long max_low_pfn
)
978 efi_memmap_walk(free_available_memory
, NULL
);
981 for (i
= 0; i
< e820
.nr_map
; i
++) {
982 unsigned long curr_pfn
, last_pfn
, size
;
984 * Reserve usable low memory
986 if (e820
.map
[i
].type
!= E820_RAM
)
989 * We are rounding up the start address of usable memory:
991 curr_pfn
= PFN_UP(e820
.map
[i
].addr
);
992 if (curr_pfn
>= max_low_pfn
)
995 * ... and at the end of the usable range downwards:
997 last_pfn
= PFN_DOWN(e820
.map
[i
].addr
+ e820
.map
[i
].size
);
999 if (last_pfn
> max_low_pfn
)
1000 last_pfn
= max_low_pfn
;
1003 * .. finally, did all the rounding and playing
1004 * around just make the area go away?
1006 if (last_pfn
<= curr_pfn
)
1009 size
= last_pfn
- curr_pfn
;
1010 free_bootmem(PFN_PHYS(curr_pfn
), PFN_PHYS(size
));
1015 * workaround for Dell systems that neglect to reserve EBDA
1017 static void __init
reserve_ebda_region(void)
1020 addr
= get_bios_ebda();
1022 reserve_bootmem(addr
, PAGE_SIZE
);
1025 #ifndef CONFIG_DISCONTIGMEM
1026 void __init
setup_bootmem_allocator(void);
1027 static unsigned long __init
setup_memory(void)
1030 * partially used pages are not usable - thus
1031 * we are rounding upwards:
1033 min_low_pfn
= PFN_UP(init_pg_tables_end
);
1037 max_low_pfn
= find_max_low_pfn();
1039 #ifdef CONFIG_HIGHMEM
1040 highstart_pfn
= highend_pfn
= max_pfn
;
1041 if (max_pfn
> max_low_pfn
) {
1042 highstart_pfn
= max_low_pfn
;
1044 printk(KERN_NOTICE
"%ldMB HIGHMEM available.\n",
1045 pages_to_mb(highend_pfn
- highstart_pfn
));
1047 printk(KERN_NOTICE
"%ldMB LOWMEM available.\n",
1048 pages_to_mb(max_low_pfn
));
1050 setup_bootmem_allocator();
1055 void __init
zone_sizes_init(void)
1057 unsigned long zones_size
[MAX_NR_ZONES
] = {0, 0, 0};
1058 unsigned int max_dma
, low
;
1060 max_dma
= virt_to_phys((char *)MAX_DMA_ADDRESS
) >> PAGE_SHIFT
;
1064 zones_size
[ZONE_DMA
] = low
;
1066 zones_size
[ZONE_DMA
] = max_dma
;
1067 zones_size
[ZONE_NORMAL
] = low
- max_dma
;
1068 #ifdef CONFIG_HIGHMEM
1069 zones_size
[ZONE_HIGHMEM
] = highend_pfn
- low
;
1072 free_area_init(zones_size
);
1075 extern unsigned long setup_memory(void);
1076 extern void zone_sizes_init(void);
1077 #endif /* !CONFIG_DISCONTIGMEM */
1079 void __init
setup_bootmem_allocator(void)
1081 unsigned long bootmap_size
;
1083 * Initialize the boot-time allocator (with low memory only):
1085 bootmap_size
= init_bootmem(min_low_pfn
, max_low_pfn
);
1087 register_bootmem_low_pages(max_low_pfn
);
1090 * Reserve the bootmem bitmap itself as well. We do this in two
1091 * steps (first step was init_bootmem()) because this catches
1092 * the (very unlikely) case of us accidentally initializing the
1093 * bootmem allocator with an invalid RAM area.
1095 reserve_bootmem(HIGH_MEMORY
, (PFN_PHYS(min_low_pfn
) +
1096 bootmap_size
+ PAGE_SIZE
-1) - (HIGH_MEMORY
));
1099 * reserve physical page 0 - it's a special BIOS page on many boxes,
1100 * enabling clean reboots, SMP operation, laptop functions.
1102 reserve_bootmem(0, PAGE_SIZE
);
1104 /* reserve EBDA region, it's a 4K region */
1105 reserve_ebda_region();
1107 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1108 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1109 unless you have no PS/2 mouse plugged in. */
1110 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
&&
1111 boot_cpu_data
.x86
== 6)
1112 reserve_bootmem(0xa0000 - 4096, 4096);
1116 * But first pinch a few for the stack/trampoline stuff
1117 * FIXME: Don't need the extra page at 4K, but need to fix
1118 * trampoline before removing it. (see the GDT stuff)
1120 reserve_bootmem(PAGE_SIZE
, PAGE_SIZE
);
1122 #ifdef CONFIG_ACPI_SLEEP
1124 * Reserve low memory region for sleep support.
1126 acpi_reserve_bootmem();
1128 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1130 * Find and reserve possible boot-time SMP configuration:
1135 #ifdef CONFIG_BLK_DEV_INITRD
1136 if (LOADER_TYPE
&& INITRD_START
) {
1137 if (INITRD_START
+ INITRD_SIZE
<= (max_low_pfn
<< PAGE_SHIFT
)) {
1138 reserve_bootmem(INITRD_START
, INITRD_SIZE
);
1140 INITRD_START
? INITRD_START
+ PAGE_OFFSET
: 0;
1141 initrd_end
= initrd_start
+INITRD_SIZE
;
1144 printk(KERN_ERR
"initrd extends beyond end of memory "
1145 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1146 INITRD_START
+ INITRD_SIZE
,
1147 max_low_pfn
<< PAGE_SHIFT
);
1155 * The node 0 pgdat is initialized before all of these because
1156 * it's needed for bootmem. node>0 pgdats have their virtual
1157 * space allocated before the pagetables are in place to access
1158 * them, so they can't be cleared then.
1160 * This should all compile down to nothing when NUMA is off.
1162 void __init
remapped_pgdat_init(void)
1166 for_each_online_node(nid
) {
1168 memset(NODE_DATA(nid
), 0, sizeof(struct pglist_data
));
1173 * Request address space for all standard RAM and ROM resources
1174 * and also for regions reported as reserved by the e820.
1177 legacy_init_iomem_resources(struct resource
*code_resource
, struct resource
*data_resource
)
1182 for (i
= 0; i
< e820
.nr_map
; i
++) {
1183 struct resource
*res
;
1184 if (e820
.map
[i
].addr
+ e820
.map
[i
].size
> 0x100000000ULL
)
1186 res
= alloc_bootmem_low(sizeof(struct resource
));
1187 switch (e820
.map
[i
].type
) {
1188 case E820_RAM
: res
->name
= "System RAM"; break;
1189 case E820_ACPI
: res
->name
= "ACPI Tables"; break;
1190 case E820_NVS
: res
->name
= "ACPI Non-volatile Storage"; break;
1191 default: res
->name
= "reserved";
1193 res
->start
= e820
.map
[i
].addr
;
1194 res
->end
= res
->start
+ e820
.map
[i
].size
- 1;
1195 res
->flags
= IORESOURCE_MEM
| IORESOURCE_BUSY
;
1196 request_resource(&iomem_resource
, res
);
1197 if (e820
.map
[i
].type
== E820_RAM
) {
1199 * We don't know which RAM region contains kernel data,
1200 * so we try it repeatedly and let the resource manager
1203 request_resource(res
, code_resource
);
1204 request_resource(res
, data_resource
);
1210 * Request address space for all standard resources
1212 static void __init
register_memory(void)
1214 unsigned long gapstart
, gapsize
;
1215 unsigned long long last
;
1219 efi_initialize_iomem_resources(&code_resource
, &data_resource
);
1221 legacy_init_iomem_resources(&code_resource
, &data_resource
);
1223 /* EFI systems may still have VGA */
1224 request_resource(&iomem_resource
, &video_ram_resource
);
1226 /* request I/O space for devices used on all i[345]86 PCs */
1227 for (i
= 0; i
< STANDARD_IO_RESOURCES
; i
++)
1228 request_resource(&ioport_resource
, &standard_io_resources
[i
]);
1231 * Search for the bigest gap in the low 32 bits of the e820
1234 last
= 0x100000000ull
;
1235 gapstart
= 0x10000000;
1239 unsigned long long start
= e820
.map
[i
].addr
;
1240 unsigned long long end
= start
+ e820
.map
[i
].size
;
1243 * Since "last" is at most 4GB, we know we'll
1244 * fit in 32 bits if this condition is true
1247 unsigned long gap
= last
- end
;
1249 if (gap
> gapsize
) {
1259 * Start allocating dynamic PCI memory a bit into the gap,
1260 * aligned up to the nearest megabyte.
1262 * Question: should we try to pad it up a bit (do something
1263 * like " + (gapsize >> 3)" in there too?). We now have the
1266 pci_mem_start
= (gapstart
+ 0xfffff) & ~0xfffff;
1268 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1269 pci_mem_start
, gapstart
, gapsize
);
1272 /* Use inline assembly to define this because the nops are defined
1273 as inline assembly strings in the include files and we cannot
1274 get them easily into strings. */
1275 asm("\t.data\nintelnops: "
1276 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1277 GENERIC_NOP7 GENERIC_NOP8
);
1278 asm("\t.data\nk8nops: "
1279 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1281 asm("\t.data\nk7nops: "
1282 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1285 extern unsigned char intelnops
[], k8nops
[], k7nops
[];
1286 static unsigned char *intel_nops
[ASM_NOP_MAX
+1] = {
1291 intelnops
+ 1 + 2 + 3,
1292 intelnops
+ 1 + 2 + 3 + 4,
1293 intelnops
+ 1 + 2 + 3 + 4 + 5,
1294 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6,
1295 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
1297 static unsigned char *k8_nops
[ASM_NOP_MAX
+1] = {
1303 k8nops
+ 1 + 2 + 3 + 4,
1304 k8nops
+ 1 + 2 + 3 + 4 + 5,
1305 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6,
1306 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
1308 static unsigned char *k7_nops
[ASM_NOP_MAX
+1] = {
1314 k7nops
+ 1 + 2 + 3 + 4,
1315 k7nops
+ 1 + 2 + 3 + 4 + 5,
1316 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6,
1317 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
1321 unsigned char **noptable
;
1323 { X86_FEATURE_K8
, k8_nops
},
1324 { X86_FEATURE_K7
, k7_nops
},
1328 /* Replace instructions with better alternatives for this CPU type.
1330 This runs before SMP is initialized to avoid SMP problems with
1331 self modifying code. This implies that assymetric systems where
1332 APs have less capabilities than the boot processor are not handled.
1333 In this case boot with "noreplacement". */
1334 void apply_alternatives(void *start
, void *end
)
1336 struct alt_instr
*a
;
1338 unsigned char **noptable
= intel_nops
;
1339 for (i
= 0; noptypes
[i
].cpuid
>= 0; i
++) {
1340 if (boot_cpu_has(noptypes
[i
].cpuid
)) {
1341 noptable
= noptypes
[i
].noptable
;
1345 for (a
= start
; (void *)a
< end
; a
++) {
1346 if (!boot_cpu_has(a
->cpuid
))
1348 BUG_ON(a
->replacementlen
> a
->instrlen
);
1349 memcpy(a
->instr
, a
->replacement
, a
->replacementlen
);
1350 diff
= a
->instrlen
- a
->replacementlen
;
1351 /* Pad the rest with nops */
1352 for (i
= a
->replacementlen
; diff
> 0; diff
-= k
, i
+= k
) {
1354 if (k
> ASM_NOP_MAX
)
1356 memcpy(a
->instr
+ i
, noptable
[k
], k
);
1361 static int no_replacement __initdata
= 0;
1363 void __init
alternative_instructions(void)
1365 extern struct alt_instr __alt_instructions
[], __alt_instructions_end
[];
1368 apply_alternatives(__alt_instructions
, __alt_instructions_end
);
1371 static int __init
noreplacement_setup(char *s
)
1377 __setup("noreplacement", noreplacement_setup
);
1379 static char * __init
machine_specific_memory_setup(void);
1382 static void set_mca_bus(int x
)
1387 static void set_mca_bus(int x
) { }
1391 * Determine if we were loaded by an EFI loader. If so, then we have also been
1392 * passed the efi memmap, systab, etc., so we should use these data structures
1393 * for initialization. Note, the efi init code path is determined by the
1394 * global efi_enabled. This allows the same kernel image to be used on existing
1395 * systems (with a traditional BIOS) as well as on EFI systems.
1397 void __init
setup_arch(char **cmdline_p
)
1399 unsigned long max_low_pfn
;
1401 memcpy(&boot_cpu_data
, &new_cpu_data
, sizeof(new_cpu_data
));
1402 pre_setup_arch_hook();
1406 * FIXME: This isn't an official loader_type right
1407 * now but does currently work with elilo.
1408 * If we were configured as an EFI kernel, check to make
1409 * sure that we were loaded correctly from elilo and that
1410 * the system table is valid. If not, then initialize normally.
1413 if ((LOADER_TYPE
== 0x50) && EFI_SYSTAB
)
1417 ROOT_DEV
= old_decode_dev(ORIG_ROOT_DEV
);
1418 drive_info
= DRIVE_INFO
;
1419 screen_info
= SCREEN_INFO
;
1420 edid_info
= EDID_INFO
;
1421 apm_info
.bios
= APM_BIOS_INFO
;
1422 ist_info
= IST_INFO
;
1423 saved_videomode
= VIDEO_MODE
;
1424 if( SYS_DESC_TABLE
.length
!= 0 ) {
1425 set_mca_bus(SYS_DESC_TABLE
.table
[3] & 0x2);
1426 machine_id
= SYS_DESC_TABLE
.table
[0];
1427 machine_submodel_id
= SYS_DESC_TABLE
.table
[1];
1428 BIOS_revision
= SYS_DESC_TABLE
.table
[2];
1430 bootloader_type
= LOADER_TYPE
;
1432 #ifdef CONFIG_BLK_DEV_RAM
1433 rd_image_start
= RAMDISK_FLAGS
& RAMDISK_IMAGE_START_MASK
;
1434 rd_prompt
= ((RAMDISK_FLAGS
& RAMDISK_PROMPT_FLAG
) != 0);
1435 rd_doload
= ((RAMDISK_FLAGS
& RAMDISK_LOAD_FLAG
) != 0);
1441 printk(KERN_INFO
"BIOS-provided physical RAM map:\n");
1442 print_memory_map(machine_specific_memory_setup());
1447 if (!MOUNT_ROOT_RDONLY
)
1448 root_mountflags
&= ~MS_RDONLY
;
1449 init_mm
.start_code
= (unsigned long) _text
;
1450 init_mm
.end_code
= (unsigned long) _etext
;
1451 init_mm
.end_data
= (unsigned long) _edata
;
1452 init_mm
.brk
= init_pg_tables_end
+ PAGE_OFFSET
;
1454 code_resource
.start
= virt_to_phys(_text
);
1455 code_resource
.end
= virt_to_phys(_etext
)-1;
1456 data_resource
.start
= virt_to_phys(_etext
);
1457 data_resource
.end
= virt_to_phys(_edata
)-1;
1459 parse_cmdline_early(cmdline_p
);
1461 max_low_pfn
= setup_memory();
1464 * NOTE: before this point _nobody_ is allowed to allocate
1465 * any memory using the bootmem allocator. Although the
1466 * alloctor is now initialised only the first 8Mb of the kernel
1467 * virtual address space has been mapped. All allocations before
1468 * paging_init() has completed must use the alloc_bootmem_low_pages()
1469 * variant (which allocates DMA'able memory) and care must be taken
1470 * not to exceed the 8Mb limit.
1474 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1477 remapped_pgdat_init();
1481 * NOTE: at this point the bootmem allocator is fully available.
1484 #ifdef CONFIG_EARLY_PRINTK
1486 char *s
= strstr(*cmdline_p
, "earlyprintk=");
1488 extern void setup_early_printk(char *);
1490 setup_early_printk(s
);
1491 printk("early console enabled\n");
1499 #ifdef CONFIG_X86_GENERICARCH
1500 generic_apic_probe(*cmdline_p
);
1506 * Parse the ACPI tables for possible boot-time SMP configuration.
1508 acpi_boot_table_init();
1511 #ifdef CONFIG_X86_LOCAL_APIC
1512 if (smp_found_config
)
1519 #if defined(CONFIG_VGA_CONSOLE)
1520 if (!efi_enabled
|| (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY
))
1521 conswitchp
= &vga_con
;
1522 #elif defined(CONFIG_DUMMY_CONSOLE)
1523 conswitchp
= &dummy_con
;
1528 #include "setup_arch_post.h"
1532 * c-file-style:"k&r"