2 * x86_64 specific EFI support functions
3 * Based on Extensible Firmware Interface Specification version 1.0
5 * Copyright (C) 2005-2008 Intel Co.
6 * Fenghua Yu <fenghua.yu@intel.com>
7 * Bibo Mao <bibo.mao@intel.com>
8 * Chandramouli Narayanan <mouli@linux.intel.com>
9 * Huang Ying <ying.huang@intel.com>
11 * Code to convert EFI to E820 map has been implemented in elilo bootloader
12 * based on a EFI patch by Edgar Hucek. Based on the E820 map, the page table
13 * is setup appropriately for EFI runtime code.
18 #define pr_fmt(fmt) "efi: " fmt
20 #include <linux/kernel.h>
21 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/spinlock.h>
25 #include <linux/bootmem.h>
26 #include <linux/ioport.h>
27 #include <linux/init.h>
28 #include <linux/mc146818rtc.h>
29 #include <linux/efi.h>
30 #include <linux/uaccess.h>
32 #include <linux/reboot.h>
33 #include <linux/slab.h>
34 #include <linux/ucs2_string.h>
36 #include <asm/setup.h>
39 #include <asm/pgtable.h>
40 #include <asm/tlbflush.h>
41 #include <asm/proto.h>
43 #include <asm/cacheflush.h>
44 #include <asm/fixmap.h>
45 #include <asm/realmode.h>
47 #include <asm/pgalloc.h>
48 #include <asm/sections.h>
51 * We allocate runtime services regions bottom-up, starting from -4G, i.e.
52 * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
54 static u64 efi_va
= EFI_VA_START
;
56 struct efi_scratch efi_scratch
;
58 static void __init
early_code_mapping_set_exec(int executable
)
60 efi_memory_desc_t
*md
;
62 if (!(__supported_pte_mask
& _PAGE_NX
))
65 /* Make EFI service code area executable */
66 for_each_efi_memory_desc(md
) {
67 if (md
->type
== EFI_RUNTIME_SERVICES_CODE
||
68 md
->type
== EFI_BOOT_SERVICES_CODE
)
69 efi_set_executable(md
, executable
);
73 pgd_t
* __init
efi_call_phys_prolog(void)
75 unsigned long vaddress
;
81 if (!efi_enabled(EFI_OLD_MEMMAP
)) {
82 save_pgd
= (pgd_t
*)read_cr3();
83 write_cr3((unsigned long)efi_scratch
.efi_pgt
);
87 early_code_mapping_set_exec(1);
89 n_pgds
= DIV_ROUND_UP((max_pfn
<< PAGE_SHIFT
), PGDIR_SIZE
);
90 save_pgd
= kmalloc_array(n_pgds
, sizeof(*save_pgd
), GFP_KERNEL
);
92 for (pgd
= 0; pgd
< n_pgds
; pgd
++) {
93 save_pgd
[pgd
] = *pgd_offset_k(pgd
* PGDIR_SIZE
);
94 vaddress
= (unsigned long)__va(pgd
* PGDIR_SIZE
);
95 set_pgd(pgd_offset_k(pgd
* PGDIR_SIZE
), *pgd_offset_k(vaddress
));
103 void __init
efi_call_phys_epilog(pgd_t
*save_pgd
)
106 * After the lock is released, the original page table is restored.
111 if (!efi_enabled(EFI_OLD_MEMMAP
)) {
112 write_cr3((unsigned long)save_pgd
);
117 nr_pgds
= DIV_ROUND_UP((max_pfn
<< PAGE_SHIFT
) , PGDIR_SIZE
);
119 for (pgd_idx
= 0; pgd_idx
< nr_pgds
; pgd_idx
++)
120 set_pgd(pgd_offset_k(pgd_idx
* PGDIR_SIZE
), save_pgd
[pgd_idx
]);
125 early_code_mapping_set_exec(0);
128 static pgd_t
*efi_pgd
;
131 * We need our own copy of the higher levels of the page tables
132 * because we want to avoid inserting EFI region mappings (EFI_VA_END
133 * to EFI_VA_START) into the standard kernel page tables. Everything
134 * else can be shared, see efi_sync_low_kernel_mappings().
136 int __init
efi_alloc_page_tables(void)
142 if (efi_enabled(EFI_OLD_MEMMAP
))
145 gfp_mask
= GFP_KERNEL
| __GFP_NOTRACK
| __GFP_ZERO
;
146 efi_pgd
= (pgd_t
*)__get_free_pages(gfp_mask
, PGD_ALLOCATION_ORDER
);
150 pgd
= efi_pgd
+ pgd_index(EFI_VA_END
);
152 pud
= pud_alloc_one(NULL
, 0);
154 free_page((unsigned long)efi_pgd
);
158 pgd_populate(NULL
, pgd
, pud
);
164 * Add low kernel mappings for passing arguments to EFI functions.
166 void efi_sync_low_kernel_mappings(void)
168 unsigned num_entries
;
169 pgd_t
*pgd_k
, *pgd_efi
;
170 pud_t
*pud_k
, *pud_efi
;
172 if (efi_enabled(EFI_OLD_MEMMAP
))
176 * We can share all PGD entries apart from the one entry that
177 * covers the EFI runtime mapping space.
179 * Make sure the EFI runtime region mappings are guaranteed to
180 * only span a single PGD entry and that the entry also maps
181 * other important kernel regions.
183 BUILD_BUG_ON(pgd_index(EFI_VA_END
) != pgd_index(MODULES_END
));
184 BUILD_BUG_ON((EFI_VA_START
& PGDIR_MASK
) !=
185 (EFI_VA_END
& PGDIR_MASK
));
187 pgd_efi
= efi_pgd
+ pgd_index(PAGE_OFFSET
);
188 pgd_k
= pgd_offset_k(PAGE_OFFSET
);
190 num_entries
= pgd_index(EFI_VA_END
) - pgd_index(PAGE_OFFSET
);
191 memcpy(pgd_efi
, pgd_k
, sizeof(pgd_t
) * num_entries
);
194 * We share all the PUD entries apart from those that map the
195 * EFI regions. Copy around them.
197 BUILD_BUG_ON((EFI_VA_START
& ~PUD_MASK
) != 0);
198 BUILD_BUG_ON((EFI_VA_END
& ~PUD_MASK
) != 0);
200 pgd_efi
= efi_pgd
+ pgd_index(EFI_VA_END
);
201 pud_efi
= pud_offset(pgd_efi
, 0);
203 pgd_k
= pgd_offset_k(EFI_VA_END
);
204 pud_k
= pud_offset(pgd_k
, 0);
206 num_entries
= pud_index(EFI_VA_END
);
207 memcpy(pud_efi
, pud_k
, sizeof(pud_t
) * num_entries
);
209 pud_efi
= pud_offset(pgd_efi
, EFI_VA_START
);
210 pud_k
= pud_offset(pgd_k
, EFI_VA_START
);
212 num_entries
= PTRS_PER_PUD
- pud_index(EFI_VA_START
);
213 memcpy(pud_efi
, pud_k
, sizeof(pud_t
) * num_entries
);
217 * Wrapper for slow_virt_to_phys() that handles NULL addresses.
219 static inline phys_addr_t
220 virt_to_phys_or_null_size(void *va
, unsigned long size
)
227 if (virt_addr_valid(va
))
228 return virt_to_phys(va
);
231 * A fully aligned variable on the stack is guaranteed not to
232 * cross a page bounary. Try to catch strings on the stack by
233 * checking that 'size' is a power of two.
235 bad_size
= size
> PAGE_SIZE
|| !is_power_of_2(size
);
237 WARN_ON(!IS_ALIGNED((unsigned long)va
, size
) || bad_size
);
239 return slow_virt_to_phys(va
);
242 #define virt_to_phys_or_null(addr) \
243 virt_to_phys_or_null_size((addr), sizeof(*(addr)))
245 int __init
efi_setup_page_tables(unsigned long pa_memmap
, unsigned num_pages
)
247 unsigned long pfn
, text
;
252 if (efi_enabled(EFI_OLD_MEMMAP
))
255 efi_scratch
.efi_pgt
= (pgd_t
*)__pa(efi_pgd
);
259 * It can happen that the physical address of new_memmap lands in memory
260 * which is not mapped in the EFI page table. Therefore we need to go
261 * and ident-map those pages containing the map before calling
262 * phys_efi_set_virtual_address_map().
264 pfn
= pa_memmap
>> PAGE_SHIFT
;
265 if (kernel_map_pages_in_pgd(pgd
, pfn
, pa_memmap
, num_pages
, _PAGE_NX
| _PAGE_RW
)) {
266 pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap
);
270 efi_scratch
.use_pgd
= true;
273 * Certain firmware versions are way too sentimential and still believe
274 * they are exclusive and unquestionable owners of the first physical page,
275 * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY
276 * (but then write-access it later during SetVirtualAddressMap()).
278 * Create a 1:1 mapping for this page, to avoid triple faults during early
279 * boot with such firmware. We are free to hand this page to the BIOS,
280 * as trim_bios_range() will reserve the first page and isolate it away
281 * from memory allocators anyway.
283 if (kernel_map_pages_in_pgd(pgd
, 0x0, 0x0, 1, _PAGE_RW
)) {
284 pr_err("Failed to create 1:1 mapping for the first page!\n");
289 * When making calls to the firmware everything needs to be 1:1
290 * mapped and addressable with 32-bit pointers. Map the kernel
291 * text and allocate a new stack because we can't rely on the
292 * stack pointer being < 4GB.
294 if (!IS_ENABLED(CONFIG_EFI_MIXED
) || efi_is_native())
297 page
= alloc_page(GFP_KERNEL
|__GFP_DMA32
);
299 panic("Unable to allocate EFI runtime stack < 4GB\n");
301 efi_scratch
.phys_stack
= virt_to_phys(page_address(page
));
302 efi_scratch
.phys_stack
+= PAGE_SIZE
; /* stack grows down */
304 npages
= (_etext
- _text
) >> PAGE_SHIFT
;
306 pfn
= text
>> PAGE_SHIFT
;
308 if (kernel_map_pages_in_pgd(pgd
, pfn
, text
, npages
, _PAGE_RW
)) {
309 pr_err("Failed to map kernel text 1:1\n");
316 static void __init
__map_region(efi_memory_desc_t
*md
, u64 va
)
318 unsigned long flags
= _PAGE_RW
;
320 pgd_t
*pgd
= efi_pgd
;
322 if (!(md
->attribute
& EFI_MEMORY_WB
))
325 pfn
= md
->phys_addr
>> PAGE_SHIFT
;
326 if (kernel_map_pages_in_pgd(pgd
, pfn
, va
, md
->num_pages
, flags
))
327 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
331 void __init
efi_map_region(efi_memory_desc_t
*md
)
333 unsigned long size
= md
->num_pages
<< PAGE_SHIFT
;
334 u64 pa
= md
->phys_addr
;
336 if (efi_enabled(EFI_OLD_MEMMAP
))
337 return old_map_region(md
);
340 * Make sure the 1:1 mappings are present as a catch-all for b0rked
341 * firmware which doesn't update all internal pointers after switching
342 * to virtual mode and would otherwise crap on us.
344 __map_region(md
, md
->phys_addr
);
347 * Enforce the 1:1 mapping as the default virtual address when
348 * booting in EFI mixed mode, because even though we may be
349 * running a 64-bit kernel, the firmware may only be 32-bit.
351 if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED
)) {
352 md
->virt_addr
= md
->phys_addr
;
358 /* Is PA 2M-aligned? */
359 if (!(pa
& (PMD_SIZE
- 1))) {
362 u64 pa_offset
= pa
& (PMD_SIZE
- 1);
363 u64 prev_va
= efi_va
;
365 /* get us the same offset within this 2M page */
366 efi_va
= (efi_va
& PMD_MASK
) + pa_offset
;
368 if (efi_va
> prev_va
)
372 if (efi_va
< EFI_VA_END
) {
373 pr_warn(FW_WARN
"VA address range overflow!\n");
378 __map_region(md
, efi_va
);
379 md
->virt_addr
= efi_va
;
383 * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges.
384 * md->virt_addr is the original virtual address which had been mapped in kexec
387 void __init
efi_map_region_fixed(efi_memory_desc_t
*md
)
389 __map_region(md
, md
->phys_addr
);
390 __map_region(md
, md
->virt_addr
);
393 void __iomem
*__init
efi_ioremap(unsigned long phys_addr
, unsigned long size
,
394 u32 type
, u64 attribute
)
396 unsigned long last_map_pfn
;
398 if (type
== EFI_MEMORY_MAPPED_IO
)
399 return ioremap(phys_addr
, size
);
401 last_map_pfn
= init_memory_mapping(phys_addr
, phys_addr
+ size
);
402 if ((last_map_pfn
<< PAGE_SHIFT
) < phys_addr
+ size
) {
403 unsigned long top
= last_map_pfn
<< PAGE_SHIFT
;
404 efi_ioremap(top
, size
- (top
- phys_addr
), type
, attribute
);
407 if (!(attribute
& EFI_MEMORY_WB
))
408 efi_memory_uc((u64
)(unsigned long)__va(phys_addr
), size
);
410 return (void __iomem
*)__va(phys_addr
);
413 void __init
parse_efi_setup(u64 phys_addr
, u32 data_len
)
415 efi_setup
= phys_addr
+ sizeof(struct setup_data
);
418 void __init
efi_runtime_update_mappings(void)
421 pgd_t
*pgd
= efi_pgd
;
422 efi_memory_desc_t
*md
;
424 if (efi_enabled(EFI_OLD_MEMMAP
)) {
425 if (__supported_pte_mask
& _PAGE_NX
)
426 runtime_code_page_mkexec();
430 if (!efi_enabled(EFI_NX_PE_DATA
))
433 for_each_efi_memory_desc(md
) {
434 unsigned long pf
= 0;
436 if (!(md
->attribute
& EFI_MEMORY_RUNTIME
))
439 if (!(md
->attribute
& EFI_MEMORY_WB
))
442 if ((md
->attribute
& EFI_MEMORY_XP
) ||
443 (md
->type
== EFI_RUNTIME_SERVICES_DATA
))
446 if (!(md
->attribute
& EFI_MEMORY_RO
) &&
447 (md
->type
!= EFI_RUNTIME_SERVICES_CODE
))
450 /* Update the 1:1 mapping */
451 pfn
= md
->phys_addr
>> PAGE_SHIFT
;
452 if (kernel_map_pages_in_pgd(pgd
, pfn
, md
->phys_addr
, md
->num_pages
, pf
))
453 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
454 md
->phys_addr
, md
->virt_addr
);
456 if (kernel_map_pages_in_pgd(pgd
, pfn
, md
->virt_addr
, md
->num_pages
, pf
))
457 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
458 md
->phys_addr
, md
->virt_addr
);
462 void __init
efi_dump_pagetable(void)
464 #ifdef CONFIG_EFI_PGT_DUMP
465 ptdump_walk_pgd_level(NULL
, efi_pgd
);
469 #ifdef CONFIG_EFI_MIXED
470 extern efi_status_t
efi64_thunk(u32
, ...);
472 #define runtime_service32(func) \
474 u32 table = (u32)(unsigned long)efi.systab; \
477 rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \
478 ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \
483 * Switch to the EFI page tables early so that we can access the 1:1
484 * runtime services mappings which are not mapped in any other page
485 * tables. This function must be called before runtime_service32().
487 * Also, disable interrupts because the IDT points to 64-bit handlers,
488 * which aren't going to function correctly when we switch to 32-bit.
490 #define efi_thunk(f, ...) \
493 unsigned long __flags; \
496 local_irq_save(__flags); \
497 arch_efi_call_virt_setup(); \
499 __func = runtime_service32(f); \
500 __s = efi64_thunk(__func, __VA_ARGS__); \
502 arch_efi_call_virt_teardown(); \
503 local_irq_restore(__flags); \
508 efi_status_t
efi_thunk_set_virtual_address_map(
509 void *phys_set_virtual_address_map
,
510 unsigned long memory_map_size
,
511 unsigned long descriptor_size
,
512 u32 descriptor_version
,
513 efi_memory_desc_t
*virtual_map
)
519 efi_sync_low_kernel_mappings();
520 local_irq_save(flags
);
522 efi_scratch
.prev_cr3
= read_cr3();
523 write_cr3((unsigned long)efi_scratch
.efi_pgt
);
526 func
= (u32
)(unsigned long)phys_set_virtual_address_map
;
527 status
= efi64_thunk(func
, memory_map_size
, descriptor_size
,
528 descriptor_version
, virtual_map
);
530 write_cr3(efi_scratch
.prev_cr3
);
532 local_irq_restore(flags
);
537 static efi_status_t
efi_thunk_get_time(efi_time_t
*tm
, efi_time_cap_t
*tc
)
540 u32 phys_tm
, phys_tc
;
542 spin_lock(&rtc_lock
);
544 phys_tm
= virt_to_phys_or_null(tm
);
545 phys_tc
= virt_to_phys_or_null(tc
);
547 status
= efi_thunk(get_time
, phys_tm
, phys_tc
);
549 spin_unlock(&rtc_lock
);
554 static efi_status_t
efi_thunk_set_time(efi_time_t
*tm
)
559 spin_lock(&rtc_lock
);
561 phys_tm
= virt_to_phys_or_null(tm
);
563 status
= efi_thunk(set_time
, phys_tm
);
565 spin_unlock(&rtc_lock
);
571 efi_thunk_get_wakeup_time(efi_bool_t
*enabled
, efi_bool_t
*pending
,
575 u32 phys_enabled
, phys_pending
, phys_tm
;
577 spin_lock(&rtc_lock
);
579 phys_enabled
= virt_to_phys_or_null(enabled
);
580 phys_pending
= virt_to_phys_or_null(pending
);
581 phys_tm
= virt_to_phys_or_null(tm
);
583 status
= efi_thunk(get_wakeup_time
, phys_enabled
,
584 phys_pending
, phys_tm
);
586 spin_unlock(&rtc_lock
);
592 efi_thunk_set_wakeup_time(efi_bool_t enabled
, efi_time_t
*tm
)
597 spin_lock(&rtc_lock
);
599 phys_tm
= virt_to_phys_or_null(tm
);
601 status
= efi_thunk(set_wakeup_time
, enabled
, phys_tm
);
603 spin_unlock(&rtc_lock
);
608 static unsigned long efi_name_size(efi_char16_t
*name
)
610 return ucs2_strsize(name
, EFI_VAR_NAME_LEN
) + 1;
614 efi_thunk_get_variable(efi_char16_t
*name
, efi_guid_t
*vendor
,
615 u32
*attr
, unsigned long *data_size
, void *data
)
618 u32 phys_name
, phys_vendor
, phys_attr
;
619 u32 phys_data_size
, phys_data
;
621 phys_data_size
= virt_to_phys_or_null(data_size
);
622 phys_vendor
= virt_to_phys_or_null(vendor
);
623 phys_name
= virt_to_phys_or_null_size(name
, efi_name_size(name
));
624 phys_attr
= virt_to_phys_or_null(attr
);
625 phys_data
= virt_to_phys_or_null_size(data
, *data_size
);
627 status
= efi_thunk(get_variable
, phys_name
, phys_vendor
,
628 phys_attr
, phys_data_size
, phys_data
);
634 efi_thunk_set_variable(efi_char16_t
*name
, efi_guid_t
*vendor
,
635 u32 attr
, unsigned long data_size
, void *data
)
637 u32 phys_name
, phys_vendor
, phys_data
;
640 phys_name
= virt_to_phys_or_null_size(name
, efi_name_size(name
));
641 phys_vendor
= virt_to_phys_or_null(vendor
);
642 phys_data
= virt_to_phys_or_null_size(data
, data_size
);
644 /* If data_size is > sizeof(u32) we've got problems */
645 status
= efi_thunk(set_variable
, phys_name
, phys_vendor
,
646 attr
, data_size
, phys_data
);
652 efi_thunk_get_next_variable(unsigned long *name_size
,
657 u32 phys_name_size
, phys_name
, phys_vendor
;
659 phys_name_size
= virt_to_phys_or_null(name_size
);
660 phys_vendor
= virt_to_phys_or_null(vendor
);
661 phys_name
= virt_to_phys_or_null_size(name
, *name_size
);
663 status
= efi_thunk(get_next_variable
, phys_name_size
,
664 phys_name
, phys_vendor
);
670 efi_thunk_get_next_high_mono_count(u32
*count
)
675 phys_count
= virt_to_phys_or_null(count
);
676 status
= efi_thunk(get_next_high_mono_count
, phys_count
);
682 efi_thunk_reset_system(int reset_type
, efi_status_t status
,
683 unsigned long data_size
, efi_char16_t
*data
)
687 phys_data
= virt_to_phys_or_null_size(data
, data_size
);
689 efi_thunk(reset_system
, reset_type
, status
, data_size
, phys_data
);
693 efi_thunk_update_capsule(efi_capsule_header_t
**capsules
,
694 unsigned long count
, unsigned long sg_list
)
697 * To properly support this function we would need to repackage
698 * 'capsules' because the firmware doesn't understand 64-bit
701 return EFI_UNSUPPORTED
;
705 efi_thunk_query_variable_info(u32 attr
, u64
*storage_space
,
706 u64
*remaining_space
,
707 u64
*max_variable_size
)
710 u32 phys_storage
, phys_remaining
, phys_max
;
712 if (efi
.runtime_version
< EFI_2_00_SYSTEM_TABLE_REVISION
)
713 return EFI_UNSUPPORTED
;
715 phys_storage
= virt_to_phys_or_null(storage_space
);
716 phys_remaining
= virt_to_phys_or_null(remaining_space
);
717 phys_max
= virt_to_phys_or_null(max_variable_size
);
719 status
= efi_thunk(query_variable_info
, attr
, phys_storage
,
720 phys_remaining
, phys_max
);
726 efi_thunk_query_capsule_caps(efi_capsule_header_t
**capsules
,
727 unsigned long count
, u64
*max_size
,
731 * To properly support this function we would need to repackage
732 * 'capsules' because the firmware doesn't understand 64-bit
735 return EFI_UNSUPPORTED
;
738 void efi_thunk_runtime_setup(void)
740 efi
.get_time
= efi_thunk_get_time
;
741 efi
.set_time
= efi_thunk_set_time
;
742 efi
.get_wakeup_time
= efi_thunk_get_wakeup_time
;
743 efi
.set_wakeup_time
= efi_thunk_set_wakeup_time
;
744 efi
.get_variable
= efi_thunk_get_variable
;
745 efi
.get_next_variable
= efi_thunk_get_next_variable
;
746 efi
.set_variable
= efi_thunk_set_variable
;
747 efi
.get_next_high_mono_count
= efi_thunk_get_next_high_mono_count
;
748 efi
.reset_system
= efi_thunk_reset_system
;
749 efi
.query_variable_info
= efi_thunk_query_variable_info
;
750 efi
.update_capsule
= efi_thunk_update_capsule
;
751 efi
.query_capsule_caps
= efi_thunk_query_capsule_caps
;
753 #endif /* CONFIG_EFI_MIXED */