1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/acpi.h>
4 #include <linux/delay.h>
6 #include <linux/kexec.h>
7 #include <linux/memblock.h>
8 #include <linux/pgtable.h>
9 #include <linux/sched/hotplug.h>
11 #include <asm/barrier.h>
13 #include <asm/intel_pt.h>
15 #include <asm/processor.h>
16 #include <asm/reboot.h>
18 /* Physical address of the Multiprocessor Wakeup Structure mailbox */
19 static u64 acpi_mp_wake_mailbox_paddr __ro_after_init
;
21 /* Virtual address of the Multiprocessor Wakeup Structure mailbox */
22 static struct acpi_madt_multiproc_wakeup_mailbox
*acpi_mp_wake_mailbox
;
24 static u64 acpi_mp_pgd __ro_after_init
;
25 static u64 acpi_mp_reset_vector_paddr __ro_after_init
;
27 static void acpi_mp_stop_this_cpu(void)
29 asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr
, acpi_mp_pgd
);
32 static void acpi_mp_play_dead(void)
35 asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr
, acpi_mp_pgd
);
38 static void acpi_mp_cpu_die(unsigned int cpu
)
40 u32 apicid
= per_cpu(x86_cpu_to_apicid
, cpu
);
41 unsigned long timeout
;
44 * Use TEST mailbox command to prove that BIOS got control over
45 * the CPU before declaring it dead.
47 * BIOS has to clear 'command' field of the mailbox.
49 acpi_mp_wake_mailbox
->apic_id
= apicid
;
50 smp_store_release(&acpi_mp_wake_mailbox
->command
,
51 ACPI_MP_WAKE_COMMAND_TEST
);
53 /* Don't wait longer than a second. */
54 timeout
= USEC_PER_SEC
;
55 while (READ_ONCE(acpi_mp_wake_mailbox
->command
) && --timeout
)
59 pr_err("Failed to hand over CPU %d to BIOS\n", cpu
);
62 /* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
63 static void __init
*alloc_pgt_page(void *dummy
)
65 return memblock_alloc(PAGE_SIZE
, PAGE_SIZE
);
68 static void __init
free_pgt_page(void *pgt
, void *dummy
)
70 return memblock_free(pgt
, PAGE_SIZE
);
74 * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
75 * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
76 * to the identity mapping and the function has be present at the same spot in
77 * the virtual address space before and after switching page tables.
79 static int __init
init_transition_pgtable(pgd_t
*pgd
)
81 pgprot_t prot
= PAGE_KERNEL_EXEC_NOENC
;
82 unsigned long vaddr
, paddr
;
88 vaddr
= (unsigned long)asm_acpi_mp_play_dead
;
89 pgd
+= pgd_index(vaddr
);
90 if (!pgd_present(*pgd
)) {
91 p4d
= (p4d_t
*)alloc_pgt_page(NULL
);
94 set_pgd(pgd
, __pgd(__pa(p4d
) | _KERNPG_TABLE
));
96 p4d
= p4d_offset(pgd
, vaddr
);
97 if (!p4d_present(*p4d
)) {
98 pud
= (pud_t
*)alloc_pgt_page(NULL
);
101 set_p4d(p4d
, __p4d(__pa(pud
) | _KERNPG_TABLE
));
103 pud
= pud_offset(p4d
, vaddr
);
104 if (!pud_present(*pud
)) {
105 pmd
= (pmd_t
*)alloc_pgt_page(NULL
);
108 set_pud(pud
, __pud(__pa(pmd
) | _KERNPG_TABLE
));
110 pmd
= pmd_offset(pud
, vaddr
);
111 if (!pmd_present(*pmd
)) {
112 pte
= (pte_t
*)alloc_pgt_page(NULL
);
115 set_pmd(pmd
, __pmd(__pa(pte
) | _KERNPG_TABLE
));
117 pte
= pte_offset_kernel(pmd
, vaddr
);
120 set_pte(pte
, pfn_pte(paddr
>> PAGE_SHIFT
, prot
));
125 static int __init
acpi_mp_setup_reset(u64 reset_vector
)
127 struct x86_mapping_info info
= {
128 .alloc_pgt_page
= alloc_pgt_page
,
129 .free_pgt_page
= free_pgt_page
,
130 .page_flag
= __PAGE_KERNEL_LARGE_EXEC
,
131 .kernpg_flag
= _KERNPG_TABLE_NOENC
,
135 pgd
= alloc_pgt_page(NULL
);
139 for (int i
= 0; i
< nr_pfn_mapped
; i
++) {
140 unsigned long mstart
, mend
;
142 mstart
= pfn_mapped
[i
].start
<< PAGE_SHIFT
;
143 mend
= pfn_mapped
[i
].end
<< PAGE_SHIFT
;
144 if (kernel_ident_mapping_init(&info
, pgd
, mstart
, mend
)) {
145 kernel_ident_mapping_free(&info
, pgd
);
150 if (kernel_ident_mapping_init(&info
, pgd
,
151 PAGE_ALIGN_DOWN(reset_vector
),
152 PAGE_ALIGN(reset_vector
+ 1))) {
153 kernel_ident_mapping_free(&info
, pgd
);
157 if (init_transition_pgtable(pgd
)) {
158 kernel_ident_mapping_free(&info
, pgd
);
162 smp_ops
.play_dead
= acpi_mp_play_dead
;
163 smp_ops
.stop_this_cpu
= acpi_mp_stop_this_cpu
;
164 smp_ops
.cpu_die
= acpi_mp_cpu_die
;
166 acpi_mp_reset_vector_paddr
= reset_vector
;
167 acpi_mp_pgd
= __pa(pgd
);
172 static int acpi_wakeup_cpu(u32 apicid
, unsigned long start_ip
)
174 if (!acpi_mp_wake_mailbox_paddr
) {
175 pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
180 * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
182 * Wakeup of secondary CPUs is fully serialized in the core code.
183 * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
185 if (!acpi_mp_wake_mailbox
) {
186 acpi_mp_wake_mailbox
= memremap(acpi_mp_wake_mailbox_paddr
,
187 sizeof(*acpi_mp_wake_mailbox
),
192 * Mailbox memory is shared between the firmware and OS. Firmware will
193 * listen on mailbox command address, and once it receives the wakeup
194 * command, the CPU associated with the given apicid will be booted.
196 * The value of 'apic_id' and 'wakeup_vector' must be visible to the
197 * firmware before the wakeup command is visible. smp_store_release()
198 * ensures ordering and visibility.
200 acpi_mp_wake_mailbox
->apic_id
= apicid
;
201 acpi_mp_wake_mailbox
->wakeup_vector
= start_ip
;
202 smp_store_release(&acpi_mp_wake_mailbox
->command
,
203 ACPI_MP_WAKE_COMMAND_WAKEUP
);
206 * Wait for the CPU to wake up.
208 * The CPU being woken up is essentially in a spin loop waiting to be
209 * woken up. It should not take long for it wake up and acknowledge by
210 * zeroing out ->command.
212 * ACPI specification doesn't provide any guidance on how long kernel
213 * has to wait for a wake up acknowledgment. It also doesn't provide
214 * a way to cancel a wake up request if it takes too long.
216 * In TDX environment, the VMM has control over how long it takes to
217 * wake up secondary. It can postpone scheduling secondary vCPU
218 * indefinitely. Giving up on wake up request and reporting error opens
219 * possible attack vector for VMM: it can wake up a secondary CPU when
220 * kernel doesn't expect it. Wait until positive result of the wake up
223 while (READ_ONCE(acpi_mp_wake_mailbox
->command
))
229 static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup
*mp_wake
)
231 cpu_hotplug_disable_offlining();
234 * ACPI MADT doesn't allow to offline a CPU after it was onlined. This
235 * limits kexec: the second kernel won't be able to use more than one CPU.
237 * To prevent a kexec kernel from onlining secondary CPUs invalidate the
238 * mailbox address in the ACPI MADT wakeup structure which prevents a
239 * kexec kernel to use it.
241 * This is safe as the booting kernel has the mailbox address cached
242 * already and acpi_wakeup_cpu() uses the cached value to bring up the
245 * Note: This is a Linux specific convention and not covered by the
246 * ACPI specification.
248 mp_wake
->mailbox_address
= 0;
251 int __init
acpi_parse_mp_wake(union acpi_subtable_headers
*header
,
252 const unsigned long end
)
254 struct acpi_madt_multiproc_wakeup
*mp_wake
;
256 mp_wake
= (struct acpi_madt_multiproc_wakeup
*)header
;
259 * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
260 * entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
261 * than the actual size of the MP wakeup entry in ACPI table because the
262 * 'reset_vector' is only available in the V1 MP wakeup structure.
266 if (end
- (unsigned long)mp_wake
< ACPI_MADT_MP_WAKEUP_SIZE_V0
)
268 if (mp_wake
->header
.length
< ACPI_MADT_MP_WAKEUP_SIZE_V0
)
271 acpi_table_print_madt_entry(&header
->common
);
273 acpi_mp_wake_mailbox_paddr
= mp_wake
->mailbox_address
;
275 if (mp_wake
->version
>= ACPI_MADT_MP_WAKEUP_VERSION_V1
&&
276 mp_wake
->header
.length
>= ACPI_MADT_MP_WAKEUP_SIZE_V1
) {
277 if (acpi_mp_setup_reset(mp_wake
->reset_vector
)) {
278 pr_warn("Failed to setup MADT reset vector\n");
279 acpi_mp_disable_offlining(mp_wake
);
283 * CPU offlining requires version 1 of the ACPI MADT wakeup
286 acpi_mp_disable_offlining(mp_wake
);
289 apic_update_callback(wakeup_secondary_cpu_64
, acpi_wakeup_cpu
);