1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Paravirtualization interfaces
3 Copyright (C) 2006 Rusty Russell IBM Corporation
6 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
9 #include <linux/errno.h>
10 #include <linux/init.h>
11 #include <linux/export.h>
12 #include <linux/efi.h>
13 #include <linux/bcd.h>
14 #include <linux/highmem.h>
15 #include <linux/kprobes.h>
18 #include <asm/paravirt.h>
19 #include <asm/debugreg.h>
21 #include <asm/setup.h>
22 #include <asm/pgtable.h>
24 #include <asm/pgalloc.h>
26 #include <asm/delay.h>
27 #include <asm/fixmap.h>
29 #include <asm/tlbflush.h>
30 #include <asm/timer.h>
31 #include <asm/special_insns.h>
33 #include <asm/io_bitmap.h>
36 * nop stub, which must not clobber anything *including the stack* to
37 * avoid confusing the entry prologues.
39 extern void _paravirt_nop(void);
40 asm (".pushsection .entry.text, \"ax\"\n"
41 ".global _paravirt_nop\n"
44 ".size _paravirt_nop, . - _paravirt_nop\n\t"
45 ".type _paravirt_nop, @function\n\t"
48 void __init
default_banner(void)
50 printk(KERN_INFO
"Booting paravirtualized kernel on %s\n",
54 /* Undefined instruction for dealing with missing ops pointers. */
55 static const unsigned char ud2a
[] = { 0x0f, 0x0b };
60 } __attribute__((packed
));
62 static unsigned paravirt_patch_call(void *insn_buff
, const void *target
,
63 unsigned long addr
, unsigned len
)
65 const int call_len
= 5;
66 struct branch
*b
= insn_buff
;
67 unsigned long delta
= (unsigned long)target
- (addr
+call_len
);
70 pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr
);
71 /* Kernel might not be viable if patching fails, bail out: */
75 b
->opcode
= 0xe8; /* call */
77 BUILD_BUG_ON(sizeof(*b
) != call_len
);
82 #ifdef CONFIG_PARAVIRT_XXL
83 /* identity function, which can be inlined */
84 u64 notrace
_paravirt_ident_64(u64 x
)
89 static unsigned paravirt_patch_jmp(void *insn_buff
, const void *target
,
90 unsigned long addr
, unsigned len
)
92 struct branch
*b
= insn_buff
;
93 unsigned long delta
= (unsigned long)target
- (addr
+5);
96 #ifdef CONFIG_RETPOLINE
97 WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr
);
99 return len
; /* call too long for patch site */
102 b
->opcode
= 0xe9; /* jmp */
109 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key
);
111 void __init
native_pv_lock_init(void)
113 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR
))
114 static_branch_disable(&virt_spin_lock_key
);
117 unsigned paravirt_patch_default(u8 type
, void *insn_buff
,
118 unsigned long addr
, unsigned len
)
121 * Neat trick to map patch type back to the call within the
122 * corresponding structure.
124 void *opfunc
= *((void **)&pv_ops
+ type
);
128 /* If there's no function, patch it with a ud2a (BUG) */
129 ret
= paravirt_patch_insns(insn_buff
, len
, ud2a
, ud2a
+sizeof(ud2a
));
130 else if (opfunc
== _paravirt_nop
)
133 #ifdef CONFIG_PARAVIRT_XXL
134 /* identity functions just return their single argument */
135 else if (opfunc
== _paravirt_ident_64
)
136 ret
= paravirt_patch_ident_64(insn_buff
, len
);
138 else if (type
== PARAVIRT_PATCH(cpu
.iret
) ||
139 type
== PARAVIRT_PATCH(cpu
.usergs_sysret64
))
140 /* If operation requires a jmp, then jmp */
141 ret
= paravirt_patch_jmp(insn_buff
, opfunc
, addr
, len
);
144 /* Otherwise call the function. */
145 ret
= paravirt_patch_call(insn_buff
, opfunc
, addr
, len
);
150 unsigned paravirt_patch_insns(void *insn_buff
, unsigned len
,
151 const char *start
, const char *end
)
153 unsigned insn_len
= end
- start
;
155 /* Alternative instruction is too large for the patch site and we cannot continue: */
156 BUG_ON(insn_len
> len
|| start
== NULL
);
158 memcpy(insn_buff
, start
, insn_len
);
163 static void native_flush_tlb(void)
165 __native_flush_tlb();
169 * Global pages have to be flushed a bit differently. Not a real
170 * performance problem because this does not happen often.
172 static void native_flush_tlb_global(void)
174 __native_flush_tlb_global();
177 static void native_flush_tlb_one_user(unsigned long addr
)
179 __native_flush_tlb_one_user(addr
);
182 struct static_key paravirt_steal_enabled
;
183 struct static_key paravirt_steal_rq_enabled
;
185 static u64
native_steal_clock(int cpu
)
190 /* These are in entry.S */
191 extern void native_iret(void);
192 extern void native_usergs_sysret64(void);
194 static struct resource reserve_ioports
= {
196 .end
= IO_SPACE_LIMIT
,
197 .name
= "paravirt-ioport",
198 .flags
= IORESOURCE_IO
| IORESOURCE_BUSY
,
202 * Reserve the whole legacy IO space to prevent any legacy drivers
203 * from wasting time probing for their hardware. This is a fairly
204 * brute-force approach to disabling all non-virtual drivers.
206 * Note that this must be called very early to have any effect.
208 int paravirt_disable_iospace(void)
210 return request_resource(&ioport_resource
, &reserve_ioports
);
213 static DEFINE_PER_CPU(enum paravirt_lazy_mode
, paravirt_lazy_mode
) = PARAVIRT_LAZY_NONE
;
215 static inline void enter_lazy(enum paravirt_lazy_mode mode
)
217 BUG_ON(this_cpu_read(paravirt_lazy_mode
) != PARAVIRT_LAZY_NONE
);
219 this_cpu_write(paravirt_lazy_mode
, mode
);
222 static void leave_lazy(enum paravirt_lazy_mode mode
)
224 BUG_ON(this_cpu_read(paravirt_lazy_mode
) != mode
);
226 this_cpu_write(paravirt_lazy_mode
, PARAVIRT_LAZY_NONE
);
229 void paravirt_enter_lazy_mmu(void)
231 enter_lazy(PARAVIRT_LAZY_MMU
);
234 void paravirt_leave_lazy_mmu(void)
236 leave_lazy(PARAVIRT_LAZY_MMU
);
239 void paravirt_flush_lazy_mmu(void)
243 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU
) {
244 arch_leave_lazy_mmu_mode();
245 arch_enter_lazy_mmu_mode();
251 #ifdef CONFIG_PARAVIRT_XXL
252 void paravirt_start_context_switch(struct task_struct
*prev
)
254 BUG_ON(preemptible());
256 if (this_cpu_read(paravirt_lazy_mode
) == PARAVIRT_LAZY_MMU
) {
257 arch_leave_lazy_mmu_mode();
258 set_ti_thread_flag(task_thread_info(prev
), TIF_LAZY_MMU_UPDATES
);
260 enter_lazy(PARAVIRT_LAZY_CPU
);
263 void paravirt_end_context_switch(struct task_struct
*next
)
265 BUG_ON(preemptible());
267 leave_lazy(PARAVIRT_LAZY_CPU
);
269 if (test_and_clear_ti_thread_flag(task_thread_info(next
), TIF_LAZY_MMU_UPDATES
))
270 arch_enter_lazy_mmu_mode();
274 enum paravirt_lazy_mode
paravirt_get_lazy_mode(void)
277 return PARAVIRT_LAZY_NONE
;
279 return this_cpu_read(paravirt_lazy_mode
);
282 struct pv_info pv_info
= {
283 .name
= "bare hardware",
284 #ifdef CONFIG_PARAVIRT_XXL
286 .shared_kernel_pmd
= 1, /* Only used when CONFIG_X86_PAE is set */
289 .extra_user_64bit_cs
= __USER_CS
,
294 /* 64-bit pagetable entries */
295 #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
297 struct paravirt_patch_template pv_ops
= {
299 .init
.patch
= native_patch
,
302 .time
.sched_clock
= native_sched_clock
,
303 .time
.steal_clock
= native_steal_clock
,
306 .cpu
.io_delay
= native_io_delay
,
308 #ifdef CONFIG_PARAVIRT_XXL
309 .cpu
.cpuid
= native_cpuid
,
310 .cpu
.get_debugreg
= native_get_debugreg
,
311 .cpu
.set_debugreg
= native_set_debugreg
,
312 .cpu
.read_cr0
= native_read_cr0
,
313 .cpu
.write_cr0
= native_write_cr0
,
314 .cpu
.write_cr4
= native_write_cr4
,
315 .cpu
.wbinvd
= native_wbinvd
,
316 .cpu
.read_msr
= native_read_msr
,
317 .cpu
.write_msr
= native_write_msr
,
318 .cpu
.read_msr_safe
= native_read_msr_safe
,
319 .cpu
.write_msr_safe
= native_write_msr_safe
,
320 .cpu
.read_pmc
= native_read_pmc
,
321 .cpu
.load_tr_desc
= native_load_tr_desc
,
322 .cpu
.set_ldt
= native_set_ldt
,
323 .cpu
.load_gdt
= native_load_gdt
,
324 .cpu
.load_idt
= native_load_idt
,
325 .cpu
.store_tr
= native_store_tr
,
326 .cpu
.load_tls
= native_load_tls
,
328 .cpu
.load_gs_index
= native_load_gs_index
,
330 .cpu
.write_ldt_entry
= native_write_ldt_entry
,
331 .cpu
.write_gdt_entry
= native_write_gdt_entry
,
332 .cpu
.write_idt_entry
= native_write_idt_entry
,
334 .cpu
.alloc_ldt
= paravirt_nop
,
335 .cpu
.free_ldt
= paravirt_nop
,
337 .cpu
.load_sp0
= native_load_sp0
,
340 .cpu
.usergs_sysret64
= native_usergs_sysret64
,
342 .cpu
.iret
= native_iret
,
343 .cpu
.swapgs
= native_swapgs
,
345 #ifdef CONFIG_X86_IOPL_IOPERM
346 .cpu
.update_io_bitmap
= native_tss_update_io_bitmap
,
349 .cpu
.start_context_switch
= paravirt_nop
,
350 .cpu
.end_context_switch
= paravirt_nop
,
353 .irq
.save_fl
= __PV_IS_CALLEE_SAVE(native_save_fl
),
354 .irq
.restore_fl
= __PV_IS_CALLEE_SAVE(native_restore_fl
),
355 .irq
.irq_disable
= __PV_IS_CALLEE_SAVE(native_irq_disable
),
356 .irq
.irq_enable
= __PV_IS_CALLEE_SAVE(native_irq_enable
),
357 .irq
.safe_halt
= native_safe_halt
,
358 .irq
.halt
= native_halt
,
359 #endif /* CONFIG_PARAVIRT_XXL */
362 .mmu
.flush_tlb_user
= native_flush_tlb
,
363 .mmu
.flush_tlb_kernel
= native_flush_tlb_global
,
364 .mmu
.flush_tlb_one_user
= native_flush_tlb_one_user
,
365 .mmu
.flush_tlb_others
= native_flush_tlb_others
,
366 .mmu
.tlb_remove_table
=
367 (void (*)(struct mmu_gather
*, void *))tlb_remove_page
,
369 .mmu
.exit_mmap
= paravirt_nop
,
371 #ifdef CONFIG_PARAVIRT_XXL
372 .mmu
.read_cr2
= __PV_IS_CALLEE_SAVE(native_read_cr2
),
373 .mmu
.write_cr2
= native_write_cr2
,
374 .mmu
.read_cr3
= __native_read_cr3
,
375 .mmu
.write_cr3
= native_write_cr3
,
377 .mmu
.pgd_alloc
= __paravirt_pgd_alloc
,
378 .mmu
.pgd_free
= paravirt_nop
,
380 .mmu
.alloc_pte
= paravirt_nop
,
381 .mmu
.alloc_pmd
= paravirt_nop
,
382 .mmu
.alloc_pud
= paravirt_nop
,
383 .mmu
.alloc_p4d
= paravirt_nop
,
384 .mmu
.release_pte
= paravirt_nop
,
385 .mmu
.release_pmd
= paravirt_nop
,
386 .mmu
.release_pud
= paravirt_nop
,
387 .mmu
.release_p4d
= paravirt_nop
,
389 .mmu
.set_pte
= native_set_pte
,
390 .mmu
.set_pte_at
= native_set_pte_at
,
391 .mmu
.set_pmd
= native_set_pmd
,
393 .mmu
.ptep_modify_prot_start
= __ptep_modify_prot_start
,
394 .mmu
.ptep_modify_prot_commit
= __ptep_modify_prot_commit
,
396 #if CONFIG_PGTABLE_LEVELS >= 3
397 #ifdef CONFIG_X86_PAE
398 .mmu
.set_pte_atomic
= native_set_pte_atomic
,
399 .mmu
.pte_clear
= native_pte_clear
,
400 .mmu
.pmd_clear
= native_pmd_clear
,
402 .mmu
.set_pud
= native_set_pud
,
404 .mmu
.pmd_val
= PTE_IDENT
,
405 .mmu
.make_pmd
= PTE_IDENT
,
407 #if CONFIG_PGTABLE_LEVELS >= 4
408 .mmu
.pud_val
= PTE_IDENT
,
409 .mmu
.make_pud
= PTE_IDENT
,
411 .mmu
.set_p4d
= native_set_p4d
,
413 #if CONFIG_PGTABLE_LEVELS >= 5
414 .mmu
.p4d_val
= PTE_IDENT
,
415 .mmu
.make_p4d
= PTE_IDENT
,
417 .mmu
.set_pgd
= native_set_pgd
,
418 #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
419 #endif /* CONFIG_PGTABLE_LEVELS >= 4 */
420 #endif /* CONFIG_PGTABLE_LEVELS >= 3 */
422 .mmu
.pte_val
= PTE_IDENT
,
423 .mmu
.pgd_val
= PTE_IDENT
,
425 .mmu
.make_pte
= PTE_IDENT
,
426 .mmu
.make_pgd
= PTE_IDENT
,
428 .mmu
.dup_mmap
= paravirt_nop
,
429 .mmu
.activate_mm
= paravirt_nop
,
432 .enter
= paravirt_nop
,
433 .leave
= paravirt_nop
,
434 .flush
= paravirt_nop
,
437 .mmu
.set_fixmap
= native_set_fixmap
,
438 #endif /* CONFIG_PARAVIRT_XXL */
440 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
443 .lock
.queued_spin_lock_slowpath
= native_queued_spin_lock_slowpath
,
444 .lock
.queued_spin_unlock
=
445 PV_CALLEE_SAVE(__native_queued_spin_unlock
),
446 .lock
.wait
= paravirt_nop
,
447 .lock
.kick
= paravirt_nop
,
448 .lock
.vcpu_is_preempted
=
449 PV_CALLEE_SAVE(__native_vcpu_is_preempted
),
454 #ifdef CONFIG_PARAVIRT_XXL
455 /* At this point, native_get/set_debugreg has real function entries */
456 NOKPROBE_SYMBOL(native_get_debugreg
);
457 NOKPROBE_SYMBOL(native_set_debugreg
);
458 NOKPROBE_SYMBOL(native_load_idt
);
461 EXPORT_SYMBOL(pv_ops
);
462 EXPORT_SYMBOL_GPL(pv_info
);