2 * Kernel Probes Jump Optimization (Optprobes)
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright (C) IBM Corporation, 2002, 2004
19 * Copyright (C) Hitachi Ltd., 2012
21 #include <linux/kprobes.h>
22 #include <linux/ptrace.h>
23 #include <linux/string.h>
24 #include <linux/slab.h>
25 #include <linux/hardirq.h>
26 #include <linux/preempt.h>
27 #include <linux/extable.h>
28 #include <linux/kdebug.h>
29 #include <linux/kallsyms.h>
30 #include <linux/ftrace.h>
31 #include <linux/frame.h>
33 #include <asm/text-patching.h>
34 #include <asm/cacheflush.h>
36 #include <asm/pgtable.h>
37 #include <linux/uaccess.h>
38 #include <asm/alternative.h>
40 #include <asm/debugreg.h>
41 #include <asm/set_memory.h>
42 #include <asm/sections.h>
43 #include <asm/nospec-branch.h>
47 unsigned long __recover_optprobed_insn(kprobe_opcode_t
*buf
, unsigned long addr
)
49 struct optimized_kprobe
*op
;
54 for (i
= 0; i
< RELATIVEJUMP_SIZE
; i
++) {
55 kp
= get_kprobe((void *)addr
- i
);
56 /* This function only handles jump-optimized kprobe */
57 if (kp
&& kprobe_optimized(kp
)) {
58 op
= container_of(kp
, struct optimized_kprobe
, kp
);
59 /* If op->list is not empty, op is under optimizing */
60 if (list_empty(&op
->list
))
68 * If the kprobe can be optimized, original bytes which can be
69 * overwritten by jump destination address. In this case, original
70 * bytes must be recovered from op->optinsn.copied_insn buffer.
72 if (probe_kernel_read(buf
, (void *)addr
,
73 MAX_INSN_SIZE
* sizeof(kprobe_opcode_t
)))
76 if (addr
== (unsigned long)kp
->addr
) {
78 memcpy(buf
+ 1, op
->optinsn
.copied_insn
, RELATIVE_ADDR_SIZE
);
80 offs
= addr
- (unsigned long)kp
->addr
- 1;
81 memcpy(buf
, op
->optinsn
.copied_insn
+ offs
, RELATIVE_ADDR_SIZE
- offs
);
84 return (unsigned long)buf
;
87 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
88 static void synthesize_set_arg1(kprobe_opcode_t
*addr
, unsigned long val
)
96 *(unsigned long *)addr
= val
;
100 "optprobe_template_func:\n"
101 ".global optprobe_template_entry\n"
102 "optprobe_template_entry:\n"
104 /* We don't bother saving the ss register */
109 ".global optprobe_template_val\n"
110 "optprobe_template_val:\n"
113 ".global optprobe_template_call\n"
114 "optprobe_template_call:\n"
116 /* Move flags to rsp */
117 " movq 144(%rsp), %rdx\n"
118 " movq %rdx, 152(%rsp)\n"
120 /* Skip flags entry */
123 #else /* CONFIG_X86_32 */
127 ".global optprobe_template_val\n"
128 "optprobe_template_val:\n"
130 ".global optprobe_template_call\n"
131 "optprobe_template_call:\n"
134 " addl $4, %esp\n" /* skip cs */
137 ".global optprobe_template_end\n"
138 "optprobe_template_end:\n"
139 ".type optprobe_template_func, @function\n"
140 ".size optprobe_template_func, .-optprobe_template_func\n");
142 void optprobe_template_func(void);
143 STACK_FRAME_NON_STANDARD(optprobe_template_func
);
145 #define TMPL_MOVE_IDX \
146 ((long)optprobe_template_val - (long)optprobe_template_entry)
147 #define TMPL_CALL_IDX \
148 ((long)optprobe_template_call - (long)optprobe_template_entry)
149 #define TMPL_END_IDX \
150 ((long)optprobe_template_end - (long)optprobe_template_entry)
152 #define INT3_SIZE sizeof(kprobe_opcode_t)
154 /* Optimized kprobe call back function: called from optinsn */
156 optimized_callback(struct optimized_kprobe
*op
, struct pt_regs
*regs
)
158 /* This is possible if op is under delayed unoptimizing */
159 if (kprobe_disabled(&op
->kp
))
163 if (kprobe_running()) {
164 kprobes_inc_nmissed_count(&op
->kp
);
166 struct kprobe_ctlblk
*kcb
= get_kprobe_ctlblk();
167 /* Save skipped registers */
169 regs
->cs
= __KERNEL_CS
;
171 regs
->cs
= __KERNEL_CS
| get_kernel_rpl();
174 regs
->ip
= (unsigned long)op
->kp
.addr
+ INT3_SIZE
;
175 regs
->orig_ax
= ~0UL;
177 __this_cpu_write(current_kprobe
, &op
->kp
);
178 kcb
->kprobe_status
= KPROBE_HIT_ACTIVE
;
179 opt_pre_handler(&op
->kp
, regs
);
180 __this_cpu_write(current_kprobe
, NULL
);
182 preempt_enable_no_resched();
184 NOKPROBE_SYMBOL(optimized_callback
);
186 static int copy_optimized_instructions(u8
*dest
, u8
*src
, u8
*real
)
191 while (len
< RELATIVEJUMP_SIZE
) {
192 ret
= __copy_instruction(dest
+ len
, src
+ len
, real
, &insn
);
193 if (!ret
|| !can_boost(&insn
, src
+ len
))
197 /* Check whether the address range is reserved */
198 if (ftrace_text_reserved(src
, src
+ len
- 1) ||
199 alternatives_text_reserved(src
, src
+ len
- 1) ||
200 jump_label_text_reserved(src
, src
+ len
- 1))
206 /* Check whether insn is indirect jump */
207 static int __insn_is_indirect_jump(struct insn
*insn
)
209 return ((insn
->opcode
.bytes
[0] == 0xff &&
210 (X86_MODRM_REG(insn
->modrm
.value
) & 6) == 4) || /* Jump */
211 insn
->opcode
.bytes
[0] == 0xea); /* Segment based jump */
214 /* Check whether insn jumps into specified address range */
215 static int insn_jump_into_range(struct insn
*insn
, unsigned long start
, int len
)
217 unsigned long target
= 0;
219 switch (insn
->opcode
.bytes
[0]) {
220 case 0xe0: /* loopne */
221 case 0xe1: /* loope */
222 case 0xe2: /* loop */
223 case 0xe3: /* jcxz */
224 case 0xe9: /* near relative jump */
225 case 0xeb: /* short relative jump */
228 if ((insn
->opcode
.bytes
[1] & 0xf0) == 0x80) /* jcc near */
232 if ((insn
->opcode
.bytes
[0] & 0xf0) == 0x70) /* jcc short */
236 target
= (unsigned long)insn
->next_byte
+ insn
->immediate
.value
;
238 return (start
<= target
&& target
<= start
+ len
);
241 static int insn_is_indirect_jump(struct insn
*insn
)
243 int ret
= __insn_is_indirect_jump(insn
);
245 #ifdef CONFIG_RETPOLINE
247 * Jump to x86_indirect_thunk_* is treated as an indirect jump.
248 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
249 * older gcc may use indirect jump. So we add this check instead of
250 * replace indirect-jump check.
253 ret
= insn_jump_into_range(insn
,
254 (unsigned long)__indirect_thunk_start
,
255 (unsigned long)__indirect_thunk_end
-
256 (unsigned long)__indirect_thunk_start
);
261 /* Decode whole function to ensure any instructions don't jump into target */
262 static int can_optimize(unsigned long paddr
)
264 unsigned long addr
, size
= 0, offset
= 0;
266 kprobe_opcode_t buf
[MAX_INSN_SIZE
];
268 /* Lookup symbol including addr */
269 if (!kallsyms_lookup_size_offset(paddr
, &size
, &offset
))
273 * Do not optimize in the entry code due to the unstable
274 * stack handling and registers setup.
276 if (((paddr
>= (unsigned long)__entry_text_start
) &&
277 (paddr
< (unsigned long)__entry_text_end
)) ||
278 ((paddr
>= (unsigned long)__irqentry_text_start
) &&
279 (paddr
< (unsigned long)__irqentry_text_end
)))
282 /* Check there is enough space for a relative jump. */
283 if (size
- offset
< RELATIVEJUMP_SIZE
)
286 /* Decode instructions */
287 addr
= paddr
- offset
;
288 while (addr
< paddr
- offset
+ size
) { /* Decode until function end */
289 unsigned long recovered_insn
;
290 if (search_exception_tables(addr
))
292 * Since some fixup code will jumps into this function,
293 * we can't optimize kprobe in this function.
296 recovered_insn
= recover_probed_instruction(buf
, addr
);
299 kernel_insn_init(&insn
, (void *)recovered_insn
, MAX_INSN_SIZE
);
300 insn_get_length(&insn
);
301 /* Another subsystem puts a breakpoint */
302 if (insn
.opcode
.bytes
[0] == BREAKPOINT_INSTRUCTION
)
304 /* Recover address */
305 insn
.kaddr
= (void *)addr
;
306 insn
.next_byte
= (void *)(addr
+ insn
.length
);
307 /* Check any instructions don't jump into target */
308 if (insn_is_indirect_jump(&insn
) ||
309 insn_jump_into_range(&insn
, paddr
+ INT3_SIZE
,
318 /* Check optimized_kprobe can actually be optimized. */
319 int arch_check_optimized_kprobe(struct optimized_kprobe
*op
)
324 for (i
= 1; i
< op
->optinsn
.size
; i
++) {
325 p
= get_kprobe(op
->kp
.addr
+ i
);
326 if (p
&& !kprobe_disabled(p
))
333 /* Check the addr is within the optimized instructions. */
334 int arch_within_optimized_kprobe(struct optimized_kprobe
*op
,
337 return ((unsigned long)op
->kp
.addr
<= addr
&&
338 (unsigned long)op
->kp
.addr
+ op
->optinsn
.size
> addr
);
341 /* Free optimized instruction slot */
343 void __arch_remove_optimized_kprobe(struct optimized_kprobe
*op
, int dirty
)
345 if (op
->optinsn
.insn
) {
346 free_optinsn_slot(op
->optinsn
.insn
, dirty
);
347 op
->optinsn
.insn
= NULL
;
348 op
->optinsn
.size
= 0;
352 void arch_remove_optimized_kprobe(struct optimized_kprobe
*op
)
354 __arch_remove_optimized_kprobe(op
, 1);
358 * Copy replacing target instructions
359 * Target instructions MUST be relocatable (checked inside)
360 * This is called when new aggr(opt)probe is allocated or reused.
362 int arch_prepare_optimized_kprobe(struct optimized_kprobe
*op
,
363 struct kprobe
*__unused
)
365 u8
*buf
= NULL
, *slot
;
369 if (!can_optimize((unsigned long)op
->kp
.addr
))
372 buf
= kzalloc(MAX_OPTINSN_SIZE
, GFP_KERNEL
);
376 op
->optinsn
.insn
= slot
= get_optinsn_slot();
383 * Verify if the address gap is in 2GB range, because this uses
386 rel
= (long)slot
- (long)op
->kp
.addr
+ RELATIVEJUMP_SIZE
;
387 if (abs(rel
) > 0x7fffffff) {
392 /* Copy arch-dep-instance from template */
393 memcpy(buf
, optprobe_template_entry
, TMPL_END_IDX
);
395 /* Copy instructions into the out-of-line buffer */
396 ret
= copy_optimized_instructions(buf
+ TMPL_END_IDX
, op
->kp
.addr
,
397 slot
+ TMPL_END_IDX
);
400 op
->optinsn
.size
= ret
;
401 len
= TMPL_END_IDX
+ op
->optinsn
.size
;
403 /* Set probe information */
404 synthesize_set_arg1(buf
+ TMPL_MOVE_IDX
, (unsigned long)op
);
406 /* Set probe function call */
407 synthesize_relcall(buf
+ TMPL_CALL_IDX
,
408 slot
+ TMPL_CALL_IDX
, optimized_callback
);
410 /* Set returning jmp instruction at the tail of out-of-line buffer */
411 synthesize_reljump(buf
+ len
, slot
+ len
,
412 (u8
*)op
->kp
.addr
+ op
->optinsn
.size
);
413 len
+= RELATIVEJUMP_SIZE
;
415 /* We have to use text_poke for instuction buffer because it is RO */
416 text_poke(slot
, buf
, len
);
423 __arch_remove_optimized_kprobe(op
, 0);
428 * Replace breakpoints (int3) with relative jumps.
429 * Caller must call with locking kprobe_mutex and text_mutex.
431 void arch_optimize_kprobes(struct list_head
*oplist
)
433 struct optimized_kprobe
*op
, *tmp
;
434 u8 insn_buf
[RELATIVEJUMP_SIZE
];
436 list_for_each_entry_safe(op
, tmp
, oplist
, list
) {
437 s32 rel
= (s32
)((long)op
->optinsn
.insn
-
438 ((long)op
->kp
.addr
+ RELATIVEJUMP_SIZE
));
440 WARN_ON(kprobe_disabled(&op
->kp
));
442 /* Backup instructions which will be replaced by jump address */
443 memcpy(op
->optinsn
.copied_insn
, op
->kp
.addr
+ INT3_SIZE
,
446 insn_buf
[0] = RELATIVEJUMP_OPCODE
;
447 *(s32
*)(&insn_buf
[1]) = rel
;
449 text_poke_bp(op
->kp
.addr
, insn_buf
, RELATIVEJUMP_SIZE
,
452 list_del_init(&op
->list
);
456 /* Replace a relative jump with a breakpoint (int3). */
457 void arch_unoptimize_kprobe(struct optimized_kprobe
*op
)
459 u8 insn_buf
[RELATIVEJUMP_SIZE
];
461 /* Set int3 to first byte for kprobes */
462 insn_buf
[0] = BREAKPOINT_INSTRUCTION
;
463 memcpy(insn_buf
+ 1, op
->optinsn
.copied_insn
, RELATIVE_ADDR_SIZE
);
464 text_poke_bp(op
->kp
.addr
, insn_buf
, RELATIVEJUMP_SIZE
,
469 * Recover original instructions and breakpoints from relative jumps.
470 * Caller must call with locking kprobe_mutex.
472 extern void arch_unoptimize_kprobes(struct list_head
*oplist
,
473 struct list_head
*done_list
)
475 struct optimized_kprobe
*op
, *tmp
;
477 list_for_each_entry_safe(op
, tmp
, oplist
, list
) {
478 arch_unoptimize_kprobe(op
);
479 list_move(&op
->list
, done_list
);
483 int setup_detour_execution(struct kprobe
*p
, struct pt_regs
*regs
, int reenter
)
485 struct optimized_kprobe
*op
;
487 if (p
->flags
& KPROBE_FLAG_OPTIMIZED
) {
488 /* This kprobe is really able to run optimized path. */
489 op
= container_of(p
, struct optimized_kprobe
, kp
);
490 /* Detour through copied instructions */
491 regs
->ip
= (unsigned long)op
->optinsn
.insn
+ TMPL_END_IDX
;
493 reset_current_kprobe();
494 preempt_enable_no_resched();
499 NOKPROBE_SYMBOL(setup_detour_execution
);