mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race
[linux/fpc-iii.git] / arch / ia64 / kernel / efi.c
blob9c09bf390ccea55fd1272d69e2445dcb44935327
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Extensible Firmware Interface
5 * Based on Extensible Firmware Interface Specification version 0.9
6 * April 30, 1999
8 * Copyright (C) 1999 VA Linux Systems
9 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
10 * Copyright (C) 1999-2003 Hewlett-Packard Co.
11 * David Mosberger-Tang <davidm@hpl.hp.com>
12 * Stephane Eranian <eranian@hpl.hp.com>
13 * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
14 * Bjorn Helgaas <bjorn.helgaas@hp.com>
16 * All EFI Runtime Services are not implemented yet as EFI only
17 * supports physical mode addressing on SoftSDV. This is to be fixed
18 * in a future version. --drummond 1999-07-20
20 * Implemented EFI runtime services and virtual mode calls. --davidm
22 * Goutham Rao: <goutham.rao@intel.com>
23 * Skip non-WB memory and ignore empty memory ranges.
25 #include <linux/module.h>
26 #include <linux/bootmem.h>
27 #include <linux/crash_dump.h>
28 #include <linux/kernel.h>
29 #include <linux/init.h>
30 #include <linux/types.h>
31 #include <linux/slab.h>
32 #include <linux/time.h>
33 #include <linux/efi.h>
34 #include <linux/kexec.h>
35 #include <linux/mm.h>
37 #include <asm/io.h>
38 #include <asm/kregs.h>
39 #include <asm/meminit.h>
40 #include <asm/pgtable.h>
41 #include <asm/processor.h>
42 #include <asm/mca.h>
43 #include <asm/setup.h>
44 #include <asm/tlbflush.h>
46 #define EFI_DEBUG 0
48 static __initdata unsigned long palo_phys;
50 static __initdata efi_config_table_type_t arch_tables[] = {
51 {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
52 {NULL_GUID, NULL, 0},
55 extern efi_status_t efi_call_phys (void *, ...);
57 static efi_runtime_services_t *runtime;
58 static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
60 #define efi_call_virt(f, args...) (*(f))(args)
62 #define STUB_GET_TIME(prefix, adjust_arg) \
63 static efi_status_t \
64 prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \
65 { \
66 struct ia64_fpreg fr[6]; \
67 efi_time_cap_t *atc = NULL; \
68 efi_status_t ret; \
70 if (tc) \
71 atc = adjust_arg(tc); \
72 ia64_save_scratch_fpregs(fr); \
73 ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), \
74 adjust_arg(tm), atc); \
75 ia64_load_scratch_fpregs(fr); \
76 return ret; \
79 #define STUB_SET_TIME(prefix, adjust_arg) \
80 static efi_status_t \
81 prefix##_set_time (efi_time_t *tm) \
82 { \
83 struct ia64_fpreg fr[6]; \
84 efi_status_t ret; \
86 ia64_save_scratch_fpregs(fr); \
87 ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), \
88 adjust_arg(tm)); \
89 ia64_load_scratch_fpregs(fr); \
90 return ret; \
93 #define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \
94 static efi_status_t \
95 prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, \
96 efi_time_t *tm) \
97 { \
98 struct ia64_fpreg fr[6]; \
99 efi_status_t ret; \
101 ia64_save_scratch_fpregs(fr); \
102 ret = efi_call_##prefix( \
103 (efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \
104 adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \
105 ia64_load_scratch_fpregs(fr); \
106 return ret; \
109 #define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \
110 static efi_status_t \
111 prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \
113 struct ia64_fpreg fr[6]; \
114 efi_time_t *atm = NULL; \
115 efi_status_t ret; \
117 if (tm) \
118 atm = adjust_arg(tm); \
119 ia64_save_scratch_fpregs(fr); \
120 ret = efi_call_##prefix( \
121 (efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \
122 enabled, atm); \
123 ia64_load_scratch_fpregs(fr); \
124 return ret; \
127 #define STUB_GET_VARIABLE(prefix, adjust_arg) \
128 static efi_status_t \
129 prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \
130 unsigned long *data_size, void *data) \
132 struct ia64_fpreg fr[6]; \
133 u32 *aattr = NULL; \
134 efi_status_t ret; \
136 if (attr) \
137 aattr = adjust_arg(attr); \
138 ia64_save_scratch_fpregs(fr); \
139 ret = efi_call_##prefix( \
140 (efi_get_variable_t *) __va(runtime->get_variable), \
141 adjust_arg(name), adjust_arg(vendor), aattr, \
142 adjust_arg(data_size), adjust_arg(data)); \
143 ia64_load_scratch_fpregs(fr); \
144 return ret; \
147 #define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \
148 static efi_status_t \
149 prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \
150 efi_guid_t *vendor) \
152 struct ia64_fpreg fr[6]; \
153 efi_status_t ret; \
155 ia64_save_scratch_fpregs(fr); \
156 ret = efi_call_##prefix( \
157 (efi_get_next_variable_t *) __va(runtime->get_next_variable), \
158 adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \
159 ia64_load_scratch_fpregs(fr); \
160 return ret; \
163 #define STUB_SET_VARIABLE(prefix, adjust_arg) \
164 static efi_status_t \
165 prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \
166 u32 attr, unsigned long data_size, \
167 void *data) \
169 struct ia64_fpreg fr[6]; \
170 efi_status_t ret; \
172 ia64_save_scratch_fpregs(fr); \
173 ret = efi_call_##prefix( \
174 (efi_set_variable_t *) __va(runtime->set_variable), \
175 adjust_arg(name), adjust_arg(vendor), attr, data_size, \
176 adjust_arg(data)); \
177 ia64_load_scratch_fpregs(fr); \
178 return ret; \
181 #define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \
182 static efi_status_t \
183 prefix##_get_next_high_mono_count (u32 *count) \
185 struct ia64_fpreg fr[6]; \
186 efi_status_t ret; \
188 ia64_save_scratch_fpregs(fr); \
189 ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \
190 __va(runtime->get_next_high_mono_count), \
191 adjust_arg(count)); \
192 ia64_load_scratch_fpregs(fr); \
193 return ret; \
196 #define STUB_RESET_SYSTEM(prefix, adjust_arg) \
197 static void \
198 prefix##_reset_system (int reset_type, efi_status_t status, \
199 unsigned long data_size, efi_char16_t *data) \
201 struct ia64_fpreg fr[6]; \
202 efi_char16_t *adata = NULL; \
204 if (data) \
205 adata = adjust_arg(data); \
207 ia64_save_scratch_fpregs(fr); \
208 efi_call_##prefix( \
209 (efi_reset_system_t *) __va(runtime->reset_system), \
210 reset_type, status, data_size, adata); \
211 /* should not return, but just in case... */ \
212 ia64_load_scratch_fpregs(fr); \
215 #define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg))
217 STUB_GET_TIME(phys, phys_ptr)
218 STUB_SET_TIME(phys, phys_ptr)
219 STUB_GET_WAKEUP_TIME(phys, phys_ptr)
220 STUB_SET_WAKEUP_TIME(phys, phys_ptr)
221 STUB_GET_VARIABLE(phys, phys_ptr)
222 STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
223 STUB_SET_VARIABLE(phys, phys_ptr)
224 STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
225 STUB_RESET_SYSTEM(phys, phys_ptr)
227 #define id(arg) arg
229 STUB_GET_TIME(virt, id)
230 STUB_SET_TIME(virt, id)
231 STUB_GET_WAKEUP_TIME(virt, id)
232 STUB_SET_WAKEUP_TIME(virt, id)
233 STUB_GET_VARIABLE(virt, id)
234 STUB_GET_NEXT_VARIABLE(virt, id)
235 STUB_SET_VARIABLE(virt, id)
236 STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
237 STUB_RESET_SYSTEM(virt, id)
239 void
240 efi_gettimeofday (struct timespec64 *ts)
242 efi_time_t tm;
244 if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) {
245 memset(ts, 0, sizeof(*ts));
246 return;
249 ts->tv_sec = mktime64(tm.year, tm.month, tm.day,
250 tm.hour, tm.minute, tm.second);
251 ts->tv_nsec = tm.nanosecond;
254 static int
255 is_memory_available (efi_memory_desc_t *md)
257 if (!(md->attribute & EFI_MEMORY_WB))
258 return 0;
260 switch (md->type) {
261 case EFI_LOADER_CODE:
262 case EFI_LOADER_DATA:
263 case EFI_BOOT_SERVICES_CODE:
264 case EFI_BOOT_SERVICES_DATA:
265 case EFI_CONVENTIONAL_MEMORY:
266 return 1;
268 return 0;
271 typedef struct kern_memdesc {
272 u64 attribute;
273 u64 start;
274 u64 num_pages;
275 } kern_memdesc_t;
277 static kern_memdesc_t *kern_memmap;
279 #define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT)
281 static inline u64
282 kmd_end(kern_memdesc_t *kmd)
284 return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
287 static inline u64
288 efi_md_end(efi_memory_desc_t *md)
290 return (md->phys_addr + efi_md_size(md));
293 static inline int
294 efi_wb(efi_memory_desc_t *md)
296 return (md->attribute & EFI_MEMORY_WB);
299 static inline int
300 efi_uc(efi_memory_desc_t *md)
302 return (md->attribute & EFI_MEMORY_UC);
305 static void
306 walk (efi_freemem_callback_t callback, void *arg, u64 attr)
308 kern_memdesc_t *k;
309 u64 start, end, voff;
311 voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
312 for (k = kern_memmap; k->start != ~0UL; k++) {
313 if (k->attribute != attr)
314 continue;
315 start = PAGE_ALIGN(k->start);
316 end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
317 if (start < end)
318 if ((*callback)(start + voff, end + voff, arg) < 0)
319 return;
324 * Walk the EFI memory map and call CALLBACK once for each EFI memory
325 * descriptor that has memory that is available for OS use.
327 void
328 efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
330 walk(callback, arg, EFI_MEMORY_WB);
334 * Walk the EFI memory map and call CALLBACK once for each EFI memory
335 * descriptor that has memory that is available for uncached allocator.
337 void
338 efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
340 walk(callback, arg, EFI_MEMORY_UC);
344 * Look for the PAL_CODE region reported by EFI and map it using an
345 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
346 * Abstraction Layer chapter 11 in ADAG
348 void *
349 efi_get_pal_addr (void)
351 void *efi_map_start, *efi_map_end, *p;
352 efi_memory_desc_t *md;
353 u64 efi_desc_size;
354 int pal_code_count = 0;
355 u64 vaddr, mask;
357 efi_map_start = __va(ia64_boot_param->efi_memmap);
358 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
359 efi_desc_size = ia64_boot_param->efi_memdesc_size;
361 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
362 md = p;
363 if (md->type != EFI_PAL_CODE)
364 continue;
366 if (++pal_code_count > 1) {
367 printk(KERN_ERR "Too many EFI Pal Code memory ranges, "
368 "dropped @ %llx\n", md->phys_addr);
369 continue;
372 * The only ITLB entry in region 7 that is used is the one
373 * installed by __start(). That entry covers a 64MB range.
375 mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
376 vaddr = PAGE_OFFSET + md->phys_addr;
379 * We must check that the PAL mapping won't overlap with the
380 * kernel mapping.
382 * PAL code is guaranteed to be aligned on a power of 2 between
383 * 4k and 256KB and that only one ITR is needed to map it. This
384 * implies that the PAL code is always aligned on its size,
385 * i.e., the closest matching page size supported by the TLB.
386 * Therefore PAL code is guaranteed never to cross a 64MB unless
387 * it is bigger than 64MB (very unlikely!). So for now the
388 * following test is enough to determine whether or not we need
389 * a dedicated ITR for the PAL code.
391 if ((vaddr & mask) == (KERNEL_START & mask)) {
392 printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
393 __func__);
394 continue;
397 if (efi_md_size(md) > IA64_GRANULE_SIZE)
398 panic("Whoa! PAL code size bigger than a granule!");
400 #if EFI_DEBUG
401 mask = ~((1 << IA64_GRANULE_SHIFT) - 1);
403 printk(KERN_INFO "CPU %d: mapping PAL code "
404 "[0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
405 smp_processor_id(), md->phys_addr,
406 md->phys_addr + efi_md_size(md),
407 vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
408 #endif
409 return __va(md->phys_addr);
411 printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n",
412 __func__);
413 return NULL;
417 static u8 __init palo_checksum(u8 *buffer, u32 length)
419 u8 sum = 0;
420 u8 *end = buffer + length;
422 while (buffer < end)
423 sum = (u8) (sum + *(buffer++));
425 return sum;
429 * Parse and handle PALO table which is published at:
430 * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
432 static void __init handle_palo(unsigned long phys_addr)
434 struct palo_table *palo = __va(phys_addr);
435 u8 checksum;
437 if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
438 printk(KERN_INFO "PALO signature incorrect.\n");
439 return;
442 checksum = palo_checksum((u8 *)palo, palo->length);
443 if (checksum) {
444 printk(KERN_INFO "PALO checksum incorrect.\n");
445 return;
448 setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
451 void
452 efi_map_pal_code (void)
454 void *pal_vaddr = efi_get_pal_addr ();
455 u64 psr;
457 if (!pal_vaddr)
458 return;
461 * Cannot write to CRx with PSR.ic=1
463 psr = ia64_clear_ic();
464 ia64_itr(0x1, IA64_TR_PALCODE,
465 GRANULEROUNDDOWN((unsigned long) pal_vaddr),
466 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
467 IA64_GRANULE_SHIFT);
468 ia64_set_psr(psr); /* restore psr */
471 void __init
472 efi_init (void)
474 void *efi_map_start, *efi_map_end;
475 efi_char16_t *c16;
476 u64 efi_desc_size;
477 char *cp, vendor[100] = "unknown";
478 int i;
480 set_bit(EFI_BOOT, &efi.flags);
481 set_bit(EFI_64BIT, &efi.flags);
484 * It's too early to be able to use the standard kernel command line
485 * support...
487 for (cp = boot_command_line; *cp; ) {
488 if (memcmp(cp, "mem=", 4) == 0) {
489 mem_limit = memparse(cp + 4, &cp);
490 } else if (memcmp(cp, "max_addr=", 9) == 0) {
491 max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
492 } else if (memcmp(cp, "min_addr=", 9) == 0) {
493 min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
494 } else {
495 while (*cp != ' ' && *cp)
496 ++cp;
497 while (*cp == ' ')
498 ++cp;
501 if (min_addr != 0UL)
502 printk(KERN_INFO "Ignoring memory below %lluMB\n",
503 min_addr >> 20);
504 if (max_addr != ~0UL)
505 printk(KERN_INFO "Ignoring memory above %lluMB\n",
506 max_addr >> 20);
508 efi.systab = __va(ia64_boot_param->efi_systab);
511 * Verify the EFI Table
513 if (efi.systab == NULL)
514 panic("Whoa! Can't find EFI system table.\n");
515 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
516 panic("Whoa! EFI system table signature incorrect\n");
517 if ((efi.systab->hdr.revision >> 16) == 0)
518 printk(KERN_WARNING "Warning: EFI system table version "
519 "%d.%02d, expected 1.00 or greater\n",
520 efi.systab->hdr.revision >> 16,
521 efi.systab->hdr.revision & 0xffff);
523 /* Show what we know for posterity */
524 c16 = __va(efi.systab->fw_vendor);
525 if (c16) {
526 for (i = 0;i < (int) sizeof(vendor) - 1 && *c16; ++i)
527 vendor[i] = *c16++;
528 vendor[i] = '\0';
531 printk(KERN_INFO "EFI v%u.%.02u by %s:",
532 efi.systab->hdr.revision >> 16,
533 efi.systab->hdr.revision & 0xffff, vendor);
535 palo_phys = EFI_INVALID_TABLE_ADDR;
537 if (efi_config_init(arch_tables) != 0)
538 return;
540 if (palo_phys != EFI_INVALID_TABLE_ADDR)
541 handle_palo(palo_phys);
543 runtime = __va(efi.systab->runtime);
544 efi.get_time = phys_get_time;
545 efi.set_time = phys_set_time;
546 efi.get_wakeup_time = phys_get_wakeup_time;
547 efi.set_wakeup_time = phys_set_wakeup_time;
548 efi.get_variable = phys_get_variable;
549 efi.get_next_variable = phys_get_next_variable;
550 efi.set_variable = phys_set_variable;
551 efi.get_next_high_mono_count = phys_get_next_high_mono_count;
552 efi.reset_system = phys_reset_system;
554 efi_map_start = __va(ia64_boot_param->efi_memmap);
555 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
556 efi_desc_size = ia64_boot_param->efi_memdesc_size;
558 #if EFI_DEBUG
559 /* print EFI memory map: */
561 efi_memory_desc_t *md;
562 void *p;
564 for (i = 0, p = efi_map_start; p < efi_map_end;
565 ++i, p += efi_desc_size)
567 const char *unit;
568 unsigned long size;
569 char buf[64];
571 md = p;
572 size = md->num_pages << EFI_PAGE_SHIFT;
574 if ((size >> 40) > 0) {
575 size >>= 40;
576 unit = "TB";
577 } else if ((size >> 30) > 0) {
578 size >>= 30;
579 unit = "GB";
580 } else if ((size >> 20) > 0) {
581 size >>= 20;
582 unit = "MB";
583 } else {
584 size >>= 10;
585 unit = "KB";
588 printk("mem%02d: %s "
589 "range=[0x%016lx-0x%016lx) (%4lu%s)\n",
590 i, efi_md_typeattr_format(buf, sizeof(buf), md),
591 md->phys_addr,
592 md->phys_addr + efi_md_size(md), size, unit);
595 #endif
597 efi_map_pal_code();
598 efi_enter_virtual_mode();
601 void
602 efi_enter_virtual_mode (void)
604 void *efi_map_start, *efi_map_end, *p;
605 efi_memory_desc_t *md;
606 efi_status_t status;
607 u64 efi_desc_size;
609 efi_map_start = __va(ia64_boot_param->efi_memmap);
610 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
611 efi_desc_size = ia64_boot_param->efi_memdesc_size;
613 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
614 md = p;
615 if (md->attribute & EFI_MEMORY_RUNTIME) {
617 * Some descriptors have multiple bits set, so the
618 * order of the tests is relevant.
620 if (md->attribute & EFI_MEMORY_WB) {
621 md->virt_addr = (u64) __va(md->phys_addr);
622 } else if (md->attribute & EFI_MEMORY_UC) {
623 md->virt_addr = (u64) ioremap(md->phys_addr, 0);
624 } else if (md->attribute & EFI_MEMORY_WC) {
625 #if 0
626 md->virt_addr = ia64_remap(md->phys_addr,
627 (_PAGE_A |
628 _PAGE_P |
629 _PAGE_D |
630 _PAGE_MA_WC |
631 _PAGE_PL_0 |
632 _PAGE_AR_RW));
633 #else
634 printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
635 md->virt_addr = (u64) ioremap(md->phys_addr, 0);
636 #endif
637 } else if (md->attribute & EFI_MEMORY_WT) {
638 #if 0
639 md->virt_addr = ia64_remap(md->phys_addr,
640 (_PAGE_A |
641 _PAGE_P |
642 _PAGE_D |
643 _PAGE_MA_WT |
644 _PAGE_PL_0 |
645 _PAGE_AR_RW));
646 #else
647 printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
648 md->virt_addr = (u64) ioremap(md->phys_addr, 0);
649 #endif
654 status = efi_call_phys(__va(runtime->set_virtual_address_map),
655 ia64_boot_param->efi_memmap_size,
656 efi_desc_size,
657 ia64_boot_param->efi_memdesc_version,
658 ia64_boot_param->efi_memmap);
659 if (status != EFI_SUCCESS) {
660 printk(KERN_WARNING "warning: unable to switch EFI into "
661 "virtual mode (status=%lu)\n", status);
662 return;
665 set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
668 * Now that EFI is in virtual mode, we call the EFI functions more
669 * efficiently:
671 efi.get_time = virt_get_time;
672 efi.set_time = virt_set_time;
673 efi.get_wakeup_time = virt_get_wakeup_time;
674 efi.set_wakeup_time = virt_set_wakeup_time;
675 efi.get_variable = virt_get_variable;
676 efi.get_next_variable = virt_get_next_variable;
677 efi.set_variable = virt_set_variable;
678 efi.get_next_high_mono_count = virt_get_next_high_mono_count;
679 efi.reset_system = virt_reset_system;
683 * Walk the EFI memory map looking for the I/O port range. There can only be
684 * one entry of this type, other I/O port ranges should be described via ACPI.
687 efi_get_iobase (void)
689 void *efi_map_start, *efi_map_end, *p;
690 efi_memory_desc_t *md;
691 u64 efi_desc_size;
693 efi_map_start = __va(ia64_boot_param->efi_memmap);
694 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
695 efi_desc_size = ia64_boot_param->efi_memdesc_size;
697 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
698 md = p;
699 if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
700 if (md->attribute & EFI_MEMORY_UC)
701 return md->phys_addr;
704 return 0;
707 static struct kern_memdesc *
708 kern_memory_descriptor (unsigned long phys_addr)
710 struct kern_memdesc *md;
712 for (md = kern_memmap; md->start != ~0UL; md++) {
713 if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT))
714 return md;
716 return NULL;
719 static efi_memory_desc_t *
720 efi_memory_descriptor (unsigned long phys_addr)
722 void *efi_map_start, *efi_map_end, *p;
723 efi_memory_desc_t *md;
724 u64 efi_desc_size;
726 efi_map_start = __va(ia64_boot_param->efi_memmap);
727 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
728 efi_desc_size = ia64_boot_param->efi_memdesc_size;
730 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
731 md = p;
733 if (phys_addr - md->phys_addr < efi_md_size(md))
734 return md;
736 return NULL;
739 static int
740 efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
742 void *efi_map_start, *efi_map_end, *p;
743 efi_memory_desc_t *md;
744 u64 efi_desc_size;
745 unsigned long end;
747 efi_map_start = __va(ia64_boot_param->efi_memmap);
748 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
749 efi_desc_size = ia64_boot_param->efi_memdesc_size;
751 end = phys_addr + size;
753 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
754 md = p;
755 if (md->phys_addr < end && efi_md_end(md) > phys_addr)
756 return 1;
758 return 0;
762 efi_mem_type (unsigned long phys_addr)
764 efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
766 if (md)
767 return md->type;
768 return -EINVAL;
772 efi_mem_attributes (unsigned long phys_addr)
774 efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
776 if (md)
777 return md->attribute;
778 return 0;
780 EXPORT_SYMBOL(efi_mem_attributes);
783 efi_mem_attribute (unsigned long phys_addr, unsigned long size)
785 unsigned long end = phys_addr + size;
786 efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
787 u64 attr;
789 if (!md)
790 return 0;
793 * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells
794 * the kernel that firmware needs this region mapped.
796 attr = md->attribute & ~EFI_MEMORY_RUNTIME;
797 do {
798 unsigned long md_end = efi_md_end(md);
800 if (end <= md_end)
801 return attr;
803 md = efi_memory_descriptor(md_end);
804 if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr)
805 return 0;
806 } while (md);
807 return 0; /* never reached */
811 kern_mem_attribute (unsigned long phys_addr, unsigned long size)
813 unsigned long end = phys_addr + size;
814 struct kern_memdesc *md;
815 u64 attr;
818 * This is a hack for ioremap calls before we set up kern_memmap.
819 * Maybe we should do efi_memmap_init() earlier instead.
821 if (!kern_memmap) {
822 attr = efi_mem_attribute(phys_addr, size);
823 if (attr & EFI_MEMORY_WB)
824 return EFI_MEMORY_WB;
825 return 0;
828 md = kern_memory_descriptor(phys_addr);
829 if (!md)
830 return 0;
832 attr = md->attribute;
833 do {
834 unsigned long md_end = kmd_end(md);
836 if (end <= md_end)
837 return attr;
839 md = kern_memory_descriptor(md_end);
840 if (!md || md->attribute != attr)
841 return 0;
842 } while (md);
843 return 0; /* never reached */
845 EXPORT_SYMBOL(kern_mem_attribute);
848 valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size)
850 u64 attr;
853 * /dev/mem reads and writes use copy_to_user(), which implicitly
854 * uses a granule-sized kernel identity mapping. It's really
855 * only safe to do this for regions in kern_memmap. For more
856 * details, see Documentation/ia64/aliasing.txt.
858 attr = kern_mem_attribute(phys_addr, size);
859 if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
860 return 1;
861 return 0;
865 valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size)
867 unsigned long phys_addr = pfn << PAGE_SHIFT;
868 u64 attr;
870 attr = efi_mem_attribute(phys_addr, size);
873 * /dev/mem mmap uses normal user pages, so we don't need the entire
874 * granule, but the entire region we're mapping must support the same
875 * attribute.
877 if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
878 return 1;
881 * Intel firmware doesn't tell us about all the MMIO regions, so
882 * in general we have to allow mmap requests. But if EFI *does*
883 * tell us about anything inside this region, we should deny it.
884 * The user can always map a smaller region to avoid the overlap.
886 if (efi_memmap_intersects(phys_addr, size))
887 return 0;
889 return 1;
892 pgprot_t
893 phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size,
894 pgprot_t vma_prot)
896 unsigned long phys_addr = pfn << PAGE_SHIFT;
897 u64 attr;
900 * For /dev/mem mmap, we use user mappings, but if the region is
901 * in kern_memmap (and hence may be covered by a kernel mapping),
902 * we must use the same attribute as the kernel mapping.
904 attr = kern_mem_attribute(phys_addr, size);
905 if (attr & EFI_MEMORY_WB)
906 return pgprot_cacheable(vma_prot);
907 else if (attr & EFI_MEMORY_UC)
908 return pgprot_noncached(vma_prot);
911 * Some chipsets don't support UC access to memory. If
912 * WB is supported, we prefer that.
914 if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
915 return pgprot_cacheable(vma_prot);
917 return pgprot_noncached(vma_prot);
920 int __init
921 efi_uart_console_only(void)
923 efi_status_t status;
924 char *s, name[] = "ConOut";
925 efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
926 efi_char16_t *utf16, name_utf16[32];
927 unsigned char data[1024];
928 unsigned long size = sizeof(data);
929 struct efi_generic_dev_path *hdr, *end_addr;
930 int uart = 0;
932 /* Convert to UTF-16 */
933 utf16 = name_utf16;
934 s = name;
935 while (*s)
936 *utf16++ = *s++ & 0x7f;
937 *utf16 = 0;
939 status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
940 if (status != EFI_SUCCESS) {
941 printk(KERN_ERR "No EFI %s variable?\n", name);
942 return 0;
945 hdr = (struct efi_generic_dev_path *) data;
946 end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
947 while (hdr < end_addr) {
948 if (hdr->type == EFI_DEV_MSG &&
949 hdr->sub_type == EFI_DEV_MSG_UART)
950 uart = 1;
951 else if (hdr->type == EFI_DEV_END_PATH ||
952 hdr->type == EFI_DEV_END_PATH2) {
953 if (!uart)
954 return 0;
955 if (hdr->sub_type == EFI_DEV_END_ENTIRE)
956 return 1;
957 uart = 0;
959 hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length);
961 printk(KERN_ERR "Malformed %s value\n", name);
962 return 0;
966 * Look for the first granule aligned memory descriptor memory
967 * that is big enough to hold EFI memory map. Make sure this
968 * descriptor is at least granule sized so it does not get trimmed
970 struct kern_memdesc *
971 find_memmap_space (void)
973 u64 contig_low=0, contig_high=0;
974 u64 as = 0, ae;
975 void *efi_map_start, *efi_map_end, *p, *q;
976 efi_memory_desc_t *md, *pmd = NULL, *check_md;
977 u64 space_needed, efi_desc_size;
978 unsigned long total_mem = 0;
980 efi_map_start = __va(ia64_boot_param->efi_memmap);
981 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
982 efi_desc_size = ia64_boot_param->efi_memdesc_size;
985 * Worst case: we need 3 kernel descriptors for each efi descriptor
986 * (if every entry has a WB part in the middle, and UC head and tail),
987 * plus one for the end marker.
989 space_needed = sizeof(kern_memdesc_t) *
990 (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
992 for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
993 md = p;
994 if (!efi_wb(md)) {
995 continue;
997 if (pmd == NULL || !efi_wb(pmd) ||
998 efi_md_end(pmd) != md->phys_addr) {
999 contig_low = GRANULEROUNDUP(md->phys_addr);
1000 contig_high = efi_md_end(md);
1001 for (q = p + efi_desc_size; q < efi_map_end;
1002 q += efi_desc_size) {
1003 check_md = q;
1004 if (!efi_wb(check_md))
1005 break;
1006 if (contig_high != check_md->phys_addr)
1007 break;
1008 contig_high = efi_md_end(check_md);
1010 contig_high = GRANULEROUNDDOWN(contig_high);
1012 if (!is_memory_available(md) || md->type == EFI_LOADER_DATA)
1013 continue;
1015 /* Round ends inward to granule boundaries */
1016 as = max(contig_low, md->phys_addr);
1017 ae = min(contig_high, efi_md_end(md));
1019 /* keep within max_addr= and min_addr= command line arg */
1020 as = max(as, min_addr);
1021 ae = min(ae, max_addr);
1022 if (ae <= as)
1023 continue;
1025 /* avoid going over mem= command line arg */
1026 if (total_mem + (ae - as) > mem_limit)
1027 ae -= total_mem + (ae - as) - mem_limit;
1029 if (ae <= as)
1030 continue;
1032 if (ae - as > space_needed)
1033 break;
1035 if (p >= efi_map_end)
1036 panic("Can't allocate space for kernel memory descriptors");
1038 return __va(as);
1042 * Walk the EFI memory map and gather all memory available for kernel
1043 * to use. We can allocate partial granules only if the unavailable
1044 * parts exist, and are WB.
1046 unsigned long
1047 efi_memmap_init(u64 *s, u64 *e)
1049 struct kern_memdesc *k, *prev = NULL;
1050 u64 contig_low=0, contig_high=0;
1051 u64 as, ae, lim;
1052 void *efi_map_start, *efi_map_end, *p, *q;
1053 efi_memory_desc_t *md, *pmd = NULL, *check_md;
1054 u64 efi_desc_size;
1055 unsigned long total_mem = 0;
1057 k = kern_memmap = find_memmap_space();
1059 efi_map_start = __va(ia64_boot_param->efi_memmap);
1060 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
1061 efi_desc_size = ia64_boot_param->efi_memdesc_size;
1063 for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
1064 md = p;
1065 if (!efi_wb(md)) {
1066 if (efi_uc(md) &&
1067 (md->type == EFI_CONVENTIONAL_MEMORY ||
1068 md->type == EFI_BOOT_SERVICES_DATA)) {
1069 k->attribute = EFI_MEMORY_UC;
1070 k->start = md->phys_addr;
1071 k->num_pages = md->num_pages;
1072 k++;
1074 continue;
1076 if (pmd == NULL || !efi_wb(pmd) ||
1077 efi_md_end(pmd) != md->phys_addr) {
1078 contig_low = GRANULEROUNDUP(md->phys_addr);
1079 contig_high = efi_md_end(md);
1080 for (q = p + efi_desc_size; q < efi_map_end;
1081 q += efi_desc_size) {
1082 check_md = q;
1083 if (!efi_wb(check_md))
1084 break;
1085 if (contig_high != check_md->phys_addr)
1086 break;
1087 contig_high = efi_md_end(check_md);
1089 contig_high = GRANULEROUNDDOWN(contig_high);
1091 if (!is_memory_available(md))
1092 continue;
1095 * Round ends inward to granule boundaries
1096 * Give trimmings to uncached allocator
1098 if (md->phys_addr < contig_low) {
1099 lim = min(efi_md_end(md), contig_low);
1100 if (efi_uc(md)) {
1101 if (k > kern_memmap &&
1102 (k-1)->attribute == EFI_MEMORY_UC &&
1103 kmd_end(k-1) == md->phys_addr) {
1104 (k-1)->num_pages +=
1105 (lim - md->phys_addr)
1106 >> EFI_PAGE_SHIFT;
1107 } else {
1108 k->attribute = EFI_MEMORY_UC;
1109 k->start = md->phys_addr;
1110 k->num_pages = (lim - md->phys_addr)
1111 >> EFI_PAGE_SHIFT;
1112 k++;
1115 as = contig_low;
1116 } else
1117 as = md->phys_addr;
1119 if (efi_md_end(md) > contig_high) {
1120 lim = max(md->phys_addr, contig_high);
1121 if (efi_uc(md)) {
1122 if (lim == md->phys_addr && k > kern_memmap &&
1123 (k-1)->attribute == EFI_MEMORY_UC &&
1124 kmd_end(k-1) == md->phys_addr) {
1125 (k-1)->num_pages += md->num_pages;
1126 } else {
1127 k->attribute = EFI_MEMORY_UC;
1128 k->start = lim;
1129 k->num_pages = (efi_md_end(md) - lim)
1130 >> EFI_PAGE_SHIFT;
1131 k++;
1134 ae = contig_high;
1135 } else
1136 ae = efi_md_end(md);
1138 /* keep within max_addr= and min_addr= command line arg */
1139 as = max(as, min_addr);
1140 ae = min(ae, max_addr);
1141 if (ae <= as)
1142 continue;
1144 /* avoid going over mem= command line arg */
1145 if (total_mem + (ae - as) > mem_limit)
1146 ae -= total_mem + (ae - as) - mem_limit;
1148 if (ae <= as)
1149 continue;
1150 if (prev && kmd_end(prev) == md->phys_addr) {
1151 prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
1152 total_mem += ae - as;
1153 continue;
1155 k->attribute = EFI_MEMORY_WB;
1156 k->start = as;
1157 k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
1158 total_mem += ae - as;
1159 prev = k++;
1161 k->start = ~0L; /* end-marker */
1163 /* reserve the memory we are using for kern_memmap */
1164 *s = (u64)kern_memmap;
1165 *e = (u64)++k;
1167 return total_mem;
1170 void
1171 efi_initialize_iomem_resources(struct resource *code_resource,
1172 struct resource *data_resource,
1173 struct resource *bss_resource)
1175 struct resource *res;
1176 void *efi_map_start, *efi_map_end, *p;
1177 efi_memory_desc_t *md;
1178 u64 efi_desc_size;
1179 char *name;
1180 unsigned long flags, desc;
1182 efi_map_start = __va(ia64_boot_param->efi_memmap);
1183 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
1184 efi_desc_size = ia64_boot_param->efi_memdesc_size;
1186 res = NULL;
1188 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
1189 md = p;
1191 if (md->num_pages == 0) /* should not happen */
1192 continue;
1194 flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1195 desc = IORES_DESC_NONE;
1197 switch (md->type) {
1199 case EFI_MEMORY_MAPPED_IO:
1200 case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
1201 continue;
1203 case EFI_LOADER_CODE:
1204 case EFI_LOADER_DATA:
1205 case EFI_BOOT_SERVICES_DATA:
1206 case EFI_BOOT_SERVICES_CODE:
1207 case EFI_CONVENTIONAL_MEMORY:
1208 if (md->attribute & EFI_MEMORY_WP) {
1209 name = "System ROM";
1210 flags |= IORESOURCE_READONLY;
1211 } else if (md->attribute == EFI_MEMORY_UC) {
1212 name = "Uncached RAM";
1213 } else {
1214 name = "System RAM";
1215 flags |= IORESOURCE_SYSRAM;
1217 break;
1219 case EFI_ACPI_MEMORY_NVS:
1220 name = "ACPI Non-volatile Storage";
1221 desc = IORES_DESC_ACPI_NV_STORAGE;
1222 break;
1224 case EFI_UNUSABLE_MEMORY:
1225 name = "reserved";
1226 flags |= IORESOURCE_DISABLED;
1227 break;
1229 case EFI_PERSISTENT_MEMORY:
1230 name = "Persistent Memory";
1231 desc = IORES_DESC_PERSISTENT_MEMORY;
1232 break;
1234 case EFI_RESERVED_TYPE:
1235 case EFI_RUNTIME_SERVICES_CODE:
1236 case EFI_RUNTIME_SERVICES_DATA:
1237 case EFI_ACPI_RECLAIM_MEMORY:
1238 default:
1239 name = "reserved";
1240 break;
1243 if ((res = kzalloc(sizeof(struct resource),
1244 GFP_KERNEL)) == NULL) {
1245 printk(KERN_ERR
1246 "failed to allocate resource for iomem\n");
1247 return;
1250 res->name = name;
1251 res->start = md->phys_addr;
1252 res->end = md->phys_addr + efi_md_size(md) - 1;
1253 res->flags = flags;
1254 res->desc = desc;
1256 if (insert_resource(&iomem_resource, res) < 0)
1257 kfree(res);
1258 else {
1260 * We don't know which region contains
1261 * kernel data so we try it repeatedly and
1262 * let the resource manager test it.
1264 insert_resource(res, code_resource);
1265 insert_resource(res, data_resource);
1266 insert_resource(res, bss_resource);
1267 #ifdef CONFIG_KEXEC
1268 insert_resource(res, &efi_memmap_res);
1269 insert_resource(res, &boot_param_res);
1270 if (crashk_res.end > crashk_res.start)
1271 insert_resource(res, &crashk_res);
1272 #endif
1277 #ifdef CONFIG_KEXEC
1278 /* find a block of memory aligned to 64M exclude reserved regions
1279 rsvd_regions are sorted
1281 unsigned long __init
1282 kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
1284 int i;
1285 u64 start, end;
1286 u64 alignment = 1UL << _PAGE_SIZE_64M;
1287 void *efi_map_start, *efi_map_end, *p;
1288 efi_memory_desc_t *md;
1289 u64 efi_desc_size;
1291 efi_map_start = __va(ia64_boot_param->efi_memmap);
1292 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
1293 efi_desc_size = ia64_boot_param->efi_memdesc_size;
1295 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
1296 md = p;
1297 if (!efi_wb(md))
1298 continue;
1299 start = ALIGN(md->phys_addr, alignment);
1300 end = efi_md_end(md);
1301 for (i = 0; i < n; i++) {
1302 if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
1303 if (__pa(r[i].start) > start + size)
1304 return start;
1305 start = ALIGN(__pa(r[i].end), alignment);
1306 if (i < n-1 &&
1307 __pa(r[i+1].start) < start + size)
1308 continue;
1309 else
1310 break;
1313 if (end > start + size)
1314 return start;
1317 printk(KERN_WARNING
1318 "Cannot reserve 0x%lx byte of memory for crashdump\n", size);
1319 return ~0UL;
1321 #endif
1323 #ifdef CONFIG_CRASH_DUMP
1324 /* locate the size find a the descriptor at a certain address */
1325 unsigned long __init
1326 vmcore_find_descriptor_size (unsigned long address)
1328 void *efi_map_start, *efi_map_end, *p;
1329 efi_memory_desc_t *md;
1330 u64 efi_desc_size;
1331 unsigned long ret = 0;
1333 efi_map_start = __va(ia64_boot_param->efi_memmap);
1334 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
1335 efi_desc_size = ia64_boot_param->efi_memdesc_size;
1337 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
1338 md = p;
1339 if (efi_wb(md) && md->type == EFI_LOADER_DATA
1340 && md->phys_addr == address) {
1341 ret = efi_md_size(md);
1342 break;
1346 if (ret == 0)
1347 printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n");
1349 return ret;
1351 #endif