1 #define pr_fmt(fmt) "SMP alternatives: " fmt
3 #include <linux/module.h>
4 #include <linux/sched.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/stringify.h>
8 #include <linux/kprobes.h>
10 #include <linux/vmalloc.h>
11 #include <linux/memory.h>
12 #include <linux/stop_machine.h>
13 #include <linux/slab.h>
14 #include <asm/alternative.h>
15 #include <asm/sections.h>
16 #include <asm/pgtable.h>
19 #include <asm/cacheflush.h>
20 #include <asm/tlbflush.h>
22 #include <asm/fixmap.h>
24 #define MAX_PATCH_LEN (255-1)
26 static int __initdata_or_module debug_alternative
;
28 static int __init
debug_alt(char *str
)
30 debug_alternative
= 1;
33 __setup("debug-alternative", debug_alt
);
35 static int noreplace_smp
;
37 static int __init
setup_noreplace_smp(char *str
)
42 __setup("noreplace-smp", setup_noreplace_smp
);
44 #ifdef CONFIG_PARAVIRT
45 static int __initdata_or_module noreplace_paravirt
= 0;
47 static int __init
setup_noreplace_paravirt(char *str
)
49 noreplace_paravirt
= 1;
52 __setup("noreplace-paravirt", setup_noreplace_paravirt
);
55 #define DPRINTK(fmt, ...) \
57 if (debug_alternative) \
58 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
62 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
63 * that correspond to that nop. Getting from one nop to the next, we
64 * add to the array the offset that is equal to the sum of all sizes of
65 * nops preceding the one we are after.
67 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
68 * nice symmetry of sizes of the previous nops.
70 #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
71 static const unsigned char intelnops
[] =
83 static const unsigned char * const intel_nops
[ASM_NOP_MAX
+2] =
89 intelnops
+ 1 + 2 + 3,
90 intelnops
+ 1 + 2 + 3 + 4,
91 intelnops
+ 1 + 2 + 3 + 4 + 5,
92 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6,
93 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
94 intelnops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
99 static const unsigned char k8nops
[] =
111 static const unsigned char * const k8_nops
[ASM_NOP_MAX
+2] =
118 k8nops
+ 1 + 2 + 3 + 4,
119 k8nops
+ 1 + 2 + 3 + 4 + 5,
120 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6,
121 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
122 k8nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
126 #if defined(K7_NOP1) && !defined(CONFIG_X86_64)
127 static const unsigned char k7nops
[] =
139 static const unsigned char * const k7_nops
[ASM_NOP_MAX
+2] =
146 k7nops
+ 1 + 2 + 3 + 4,
147 k7nops
+ 1 + 2 + 3 + 4 + 5,
148 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6,
149 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
150 k7nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
155 static const unsigned char p6nops
[] =
167 static const unsigned char * const p6_nops
[ASM_NOP_MAX
+2] =
174 p6nops
+ 1 + 2 + 3 + 4,
175 p6nops
+ 1 + 2 + 3 + 4 + 5,
176 p6nops
+ 1 + 2 + 3 + 4 + 5 + 6,
177 p6nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7,
178 p6nops
+ 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
182 /* Initialize these to a safe default */
184 const unsigned char * const *ideal_nops
= p6_nops
;
186 const unsigned char * const *ideal_nops
= intel_nops
;
189 void __init
arch_init_ideal_nops(void)
191 switch (boot_cpu_data
.x86_vendor
) {
192 case X86_VENDOR_INTEL
:
194 * Due to a decoder implementation quirk, some
195 * specific Intel CPUs actually perform better with
196 * the "k8_nops" than with the SDM-recommended NOPs.
198 if (boot_cpu_data
.x86
== 6 &&
199 boot_cpu_data
.x86_model
>= 0x0f &&
200 boot_cpu_data
.x86_model
!= 0x1c &&
201 boot_cpu_data
.x86_model
!= 0x26 &&
202 boot_cpu_data
.x86_model
!= 0x27 &&
203 boot_cpu_data
.x86_model
< 0x30) {
204 ideal_nops
= k8_nops
;
205 } else if (boot_cpu_has(X86_FEATURE_NOPL
)) {
206 ideal_nops
= p6_nops
;
209 ideal_nops
= k8_nops
;
211 ideal_nops
= intel_nops
;
217 ideal_nops
= k8_nops
;
219 if (boot_cpu_has(X86_FEATURE_K8
))
220 ideal_nops
= k8_nops
;
221 else if (boot_cpu_has(X86_FEATURE_K7
))
222 ideal_nops
= k7_nops
;
224 ideal_nops
= intel_nops
;
229 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
230 static void __init_or_module
add_nops(void *insns
, unsigned int len
)
233 unsigned int noplen
= len
;
234 if (noplen
> ASM_NOP_MAX
)
235 noplen
= ASM_NOP_MAX
;
236 memcpy(insns
, ideal_nops
[noplen
], noplen
);
242 extern struct alt_instr __alt_instructions
[], __alt_instructions_end
[];
243 extern s32 __smp_locks
[], __smp_locks_end
[];
244 void *text_poke_early(void *addr
, const void *opcode
, size_t len
);
246 /* Replace instructions with better alternatives for this CPU type.
247 This runs before SMP is initialized to avoid SMP problems with
248 self modifying code. This implies that asymmetric systems where
249 APs have less capabilities than the boot processor are not handled.
250 Tough. Make sure you disable such features by hand. */
252 void __init_or_module
apply_alternatives(struct alt_instr
*start
,
253 struct alt_instr
*end
)
256 u8
*instr
, *replacement
;
257 u8 insnbuf
[MAX_PATCH_LEN
];
259 DPRINTK("%s: alt table %p -> %p\n", __func__
, start
, end
);
261 * The scan order should be from start to end. A later scanned
262 * alternative code can overwrite a previous scanned alternative code.
263 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
266 * So be careful if you want to change the scan order to any other
269 for (a
= start
; a
< end
; a
++) {
270 instr
= (u8
*)&a
->instr_offset
+ a
->instr_offset
;
271 replacement
= (u8
*)&a
->repl_offset
+ a
->repl_offset
;
272 BUG_ON(a
->replacementlen
> a
->instrlen
);
273 BUG_ON(a
->instrlen
> sizeof(insnbuf
));
274 BUG_ON(a
->cpuid
>= NCAPINTS
*32);
275 if (!boot_cpu_has(a
->cpuid
))
278 memcpy(insnbuf
, replacement
, a
->replacementlen
);
280 /* 0xe8 is a relative jump; fix the offset. */
281 if (*insnbuf
== 0xe8 && a
->replacementlen
== 5)
282 *(s32
*)(insnbuf
+ 1) += replacement
- instr
;
284 add_nops(insnbuf
+ a
->replacementlen
,
285 a
->instrlen
- a
->replacementlen
);
287 text_poke_early(instr
, insnbuf
, a
->instrlen
);
293 static void alternatives_smp_lock(const s32
*start
, const s32
*end
,
294 u8
*text
, u8
*text_end
)
298 mutex_lock(&text_mutex
);
299 for (poff
= start
; poff
< end
; poff
++) {
300 u8
*ptr
= (u8
*)poff
+ *poff
;
302 if (!*poff
|| ptr
< text
|| ptr
>= text_end
)
304 /* turn DS segment override prefix into lock prefix */
306 text_poke(ptr
, ((unsigned char []){0xf0}), 1);
308 mutex_unlock(&text_mutex
);
311 static void alternatives_smp_unlock(const s32
*start
, const s32
*end
,
312 u8
*text
, u8
*text_end
)
316 mutex_lock(&text_mutex
);
317 for (poff
= start
; poff
< end
; poff
++) {
318 u8
*ptr
= (u8
*)poff
+ *poff
;
320 if (!*poff
|| ptr
< text
|| ptr
>= text_end
)
322 /* turn lock prefix into DS segment override prefix */
324 text_poke(ptr
, ((unsigned char []){0x3E}), 1);
326 mutex_unlock(&text_mutex
);
329 struct smp_alt_module
{
330 /* what is this ??? */
334 /* ptrs to lock prefixes */
336 const s32
*locks_end
;
338 /* .text segment, needed to avoid patching init code ;) */
342 struct list_head next
;
344 static LIST_HEAD(smp_alt_modules
);
345 static DEFINE_MUTEX(smp_alt
);
346 static bool uniproc_patched
= false; /* protected by smp_alt */
348 void __init_or_module
alternatives_smp_module_add(struct module
*mod
,
350 void *locks
, void *locks_end
,
351 void *text
, void *text_end
)
353 struct smp_alt_module
*smp
;
355 mutex_lock(&smp_alt
);
356 if (!uniproc_patched
)
359 if (num_possible_cpus() == 1)
360 /* Don't bother remembering, we'll never have to undo it. */
363 smp
= kzalloc(sizeof(*smp
), GFP_KERNEL
);
365 /* we'll run the (safe but slow) SMP code then ... */
371 smp
->locks_end
= locks_end
;
373 smp
->text_end
= text_end
;
374 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
375 __func__
, smp
->locks
, smp
->locks_end
,
376 smp
->text
, smp
->text_end
, smp
->name
);
378 list_add_tail(&smp
->next
, &smp_alt_modules
);
380 alternatives_smp_unlock(locks
, locks_end
, text
, text_end
);
382 mutex_unlock(&smp_alt
);
385 void __init_or_module
alternatives_smp_module_del(struct module
*mod
)
387 struct smp_alt_module
*item
;
389 mutex_lock(&smp_alt
);
390 list_for_each_entry(item
, &smp_alt_modules
, next
) {
391 if (mod
!= item
->mod
)
393 list_del(&item
->next
);
397 mutex_unlock(&smp_alt
);
400 void alternatives_enable_smp(void)
402 struct smp_alt_module
*mod
;
404 #ifdef CONFIG_LOCKDEP
406 * Older binutils section handling bug prevented
407 * alternatives-replacement from working reliably.
409 * If this still occurs then you should see a hang
410 * or crash shortly after this line:
412 pr_info("lockdep: fixing up alternatives\n");
415 /* Why bother if there are no other CPUs? */
416 BUG_ON(num_possible_cpus() == 1);
418 mutex_lock(&smp_alt
);
420 if (uniproc_patched
) {
421 pr_info("switching to SMP code\n");
422 BUG_ON(num_online_cpus() != 1);
423 clear_cpu_cap(&boot_cpu_data
, X86_FEATURE_UP
);
424 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP
);
425 list_for_each_entry(mod
, &smp_alt_modules
, next
)
426 alternatives_smp_lock(mod
->locks
, mod
->locks_end
,
427 mod
->text
, mod
->text_end
);
428 uniproc_patched
= false;
430 mutex_unlock(&smp_alt
);
433 /* Return 1 if the address range is reserved for smp-alternatives */
434 int alternatives_text_reserved(void *start
, void *end
)
436 struct smp_alt_module
*mod
;
438 u8
*text_start
= start
;
441 list_for_each_entry(mod
, &smp_alt_modules
, next
) {
442 if (mod
->text
> text_end
|| mod
->text_end
< text_start
)
444 for (poff
= mod
->locks
; poff
< mod
->locks_end
; poff
++) {
445 const u8
*ptr
= (const u8
*)poff
+ *poff
;
447 if (text_start
<= ptr
&& text_end
> ptr
)
456 #ifdef CONFIG_PARAVIRT
457 void __init_or_module
apply_paravirt(struct paravirt_patch_site
*start
,
458 struct paravirt_patch_site
*end
)
460 struct paravirt_patch_site
*p
;
461 char insnbuf
[MAX_PATCH_LEN
];
463 if (noreplace_paravirt
)
466 for (p
= start
; p
< end
; p
++) {
469 BUG_ON(p
->len
> MAX_PATCH_LEN
);
470 /* prep the buffer with the original instructions */
471 memcpy(insnbuf
, p
->instr
, p
->len
);
472 used
= pv_init_ops
.patch(p
->instrtype
, p
->clobbers
, insnbuf
,
473 (unsigned long)p
->instr
, p
->len
);
475 BUG_ON(used
> p
->len
);
477 /* Pad the rest with nops */
478 add_nops(insnbuf
+ used
, p
->len
- used
);
479 text_poke_early(p
->instr
, insnbuf
, p
->len
);
482 extern struct paravirt_patch_site __start_parainstructions
[],
483 __stop_parainstructions
[];
484 #endif /* CONFIG_PARAVIRT */
486 void __init
alternative_instructions(void)
488 /* The patching is not fully atomic, so try to avoid local interruptions
489 that might execute the to be patched code.
490 Other CPUs are not running. */
494 * Don't stop machine check exceptions while patching.
495 * MCEs only happen when something got corrupted and in this
496 * case we must do something about the corruption.
497 * Ignoring it is worse than a unlikely patching race.
498 * Also machine checks tend to be broadcast and if one CPU
499 * goes into machine check the others follow quickly, so we don't
500 * expect a machine check to cause undue problems during to code
504 apply_alternatives(__alt_instructions
, __alt_instructions_end
);
507 /* Patch to UP if other cpus not imminent. */
508 if (!noreplace_smp
&& (num_present_cpus() == 1 || setup_max_cpus
<= 1)) {
509 uniproc_patched
= true;
510 alternatives_smp_module_add(NULL
, "core kernel",
511 __smp_locks
, __smp_locks_end
,
515 if (!uniproc_patched
|| num_possible_cpus() == 1)
516 free_init_pages("SMP alternatives",
517 (unsigned long)__smp_locks
,
518 (unsigned long)__smp_locks_end
);
521 apply_paravirt(__parainstructions
, __parainstructions_end
);
527 * text_poke_early - Update instructions on a live kernel at boot time
528 * @addr: address to modify
529 * @opcode: source of the copy
530 * @len: length to copy
532 * When you use this code to patch more than one byte of an instruction
533 * you need to make sure that other CPUs cannot execute this code in parallel.
534 * Also no thread must be currently preempted in the middle of these
535 * instructions. And on the local CPU you need to be protected again NMI or MCE
536 * handlers seeing an inconsistent instruction while you patch.
538 void *__init_or_module
text_poke_early(void *addr
, const void *opcode
,
542 local_irq_save(flags
);
543 memcpy(addr
, opcode
, len
);
545 local_irq_restore(flags
);
546 /* Could also do a CLFLUSH here to speed up CPU recovery; but
547 that causes hangs on some VIA CPUs. */
552 * text_poke - Update instructions on a live kernel
553 * @addr: address to modify
554 * @opcode: source of the copy
555 * @len: length to copy
557 * Only atomic text poke/set should be allowed when not doing early patching.
558 * It means the size must be writable atomically and the address must be aligned
559 * in a way that permits an atomic write. It also makes sure we fit on a single
562 * Note: Must be called under text_mutex.
564 void *__kprobes
text_poke(void *addr
, const void *opcode
, size_t len
)
568 struct page
*pages
[2];
571 if (!core_kernel_text((unsigned long)addr
)) {
572 pages
[0] = vmalloc_to_page(addr
);
573 pages
[1] = vmalloc_to_page(addr
+ PAGE_SIZE
);
575 pages
[0] = virt_to_page(addr
);
576 WARN_ON(!PageReserved(pages
[0]));
577 pages
[1] = virt_to_page(addr
+ PAGE_SIZE
);
580 local_irq_save(flags
);
581 set_fixmap(FIX_TEXT_POKE0
, page_to_phys(pages
[0]));
583 set_fixmap(FIX_TEXT_POKE1
, page_to_phys(pages
[1]));
584 vaddr
= (char *)fix_to_virt(FIX_TEXT_POKE0
);
585 memcpy(&vaddr
[(unsigned long)addr
& ~PAGE_MASK
], opcode
, len
);
586 clear_fixmap(FIX_TEXT_POKE0
);
588 clear_fixmap(FIX_TEXT_POKE1
);
591 /* Could also do a CLFLUSH here to speed up CPU recovery; but
592 that causes hangs on some VIA CPUs. */
593 for (i
= 0; i
< len
; i
++)
594 BUG_ON(((char *)addr
)[i
] != ((char *)opcode
)[i
]);
595 local_irq_restore(flags
);
600 * Cross-modifying kernel text with stop_machine().
601 * This code originally comes from immediate value.
603 static atomic_t stop_machine_first
;
604 static int wrote_text
;
606 struct text_poke_params
{
607 struct text_poke_param
*params
;
611 static int __kprobes
stop_machine_text_poke(void *data
)
613 struct text_poke_params
*tpp
= data
;
614 struct text_poke_param
*p
;
617 if (atomic_xchg(&stop_machine_first
, 0)) {
618 for (i
= 0; i
< tpp
->nparams
; i
++) {
620 text_poke(p
->addr
, p
->opcode
, p
->len
);
622 smp_wmb(); /* Make sure other cpus see that this has run */
627 smp_mb(); /* Load wrote_text before following execution */
630 for (i
= 0; i
< tpp
->nparams
; i
++) {
632 flush_icache_range((unsigned long)p
->addr
,
633 (unsigned long)p
->addr
+ p
->len
);
636 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
637 * that a core serializing instruction such as "cpuid" should be
638 * executed on _each_ core before the new instruction is made visible.
645 * text_poke_smp - Update instructions on a live kernel on SMP
646 * @addr: address to modify
647 * @opcode: source of the copy
648 * @len: length to copy
650 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
651 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
652 * should be allowed, since stop_machine() does _not_ protect code against
655 * Note: Must be called under get_online_cpus() and text_mutex.
657 void *__kprobes
text_poke_smp(void *addr
, const void *opcode
, size_t len
)
659 struct text_poke_params tpp
;
660 struct text_poke_param p
;
667 atomic_set(&stop_machine_first
, 1);
669 /* Use __stop_machine() because the caller already got online_cpus. */
670 __stop_machine(stop_machine_text_poke
, (void *)&tpp
, cpu_online_mask
);
675 * text_poke_smp_batch - Update instructions on a live kernel on SMP
676 * @params: an array of text_poke parameters
677 * @n: the number of elements in params.
679 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
680 * stop_machine() is heavy task, it is better to aggregate text_poke requests
681 * and do it once if possible.
683 * Note: Must be called under get_online_cpus() and text_mutex.
685 void __kprobes
text_poke_smp_batch(struct text_poke_param
*params
, int n
)
687 struct text_poke_params tpp
= {.params
= params
, .nparams
= n
};
689 atomic_set(&stop_machine_first
, 1);
691 __stop_machine(stop_machine_text_poke
, (void *)&tpp
, cpu_online_mask
);