2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/kallsyms.h>
14 #include <linux/kernel.h>
15 #include <linux/kmemcheck.h>
17 #include <linux/page-flags.h>
18 #include <linux/percpu.h>
19 #include <linux/ptrace.h>
20 #include <linux/string.h>
21 #include <linux/types.h>
23 #include <asm/cacheflush.h>
24 #include <asm/kmemcheck.h>
25 #include <asm/pgtable.h>
26 #include <asm/tlbflush.h>
35 #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
36 # define KMEMCHECK_ENABLED 0
39 #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
40 # define KMEMCHECK_ENABLED 1
43 #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
44 # define KMEMCHECK_ENABLED 2
47 int kmemcheck_enabled
= KMEMCHECK_ENABLED
;
49 int __init
kmemcheck_init(void)
53 * Limit SMP to use a single CPU. We rely on the fact that this code
54 * runs before SMP is set up.
56 if (setup_max_cpus
> 1) {
58 "kmemcheck: Limiting number of CPUs to 1.\n");
63 if (!kmemcheck_selftest()) {
64 printk(KERN_INFO
"kmemcheck: self-tests failed; disabling\n");
65 kmemcheck_enabled
= 0;
69 printk(KERN_INFO
"kmemcheck: Initialized\n");
73 early_initcall(kmemcheck_init
);
76 * We need to parse the kmemcheck= option before any memory is allocated.
78 static int __init
param_kmemcheck(char *str
)
86 ret
= kstrtoint(str
, 0, &val
);
89 kmemcheck_enabled
= val
;
93 early_param("kmemcheck", param_kmemcheck
);
95 int kmemcheck_show_addr(unsigned long address
)
99 pte
= kmemcheck_pte_lookup(address
);
103 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_PRESENT
));
104 __flush_tlb_one(address
);
108 int kmemcheck_hide_addr(unsigned long address
)
112 pte
= kmemcheck_pte_lookup(address
);
116 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_PRESENT
));
117 __flush_tlb_one(address
);
121 struct kmemcheck_context
{
126 * There can be at most two memory operands to an instruction, but
127 * each address can cross a page boundary -- so we may need up to
128 * four addresses that must be hidden/revealed for each fault.
130 unsigned long addr
[4];
131 unsigned long n_addrs
;
134 /* Data size of the instruction that caused a fault. */
138 static DEFINE_PER_CPU(struct kmemcheck_context
, kmemcheck_context
);
140 bool kmemcheck_active(struct pt_regs
*regs
)
142 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
144 return data
->balance
> 0;
147 /* Save an address that needs to be shown/hidden */
148 static void kmemcheck_save_addr(unsigned long addr
)
150 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
152 BUG_ON(data
->n_addrs
>= ARRAY_SIZE(data
->addr
));
153 data
->addr
[data
->n_addrs
++] = addr
;
156 static unsigned int kmemcheck_show_all(void)
158 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
163 for (i
= 0; i
< data
->n_addrs
; ++i
)
164 n
+= kmemcheck_show_addr(data
->addr
[i
]);
169 static unsigned int kmemcheck_hide_all(void)
171 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
176 for (i
= 0; i
< data
->n_addrs
; ++i
)
177 n
+= kmemcheck_hide_addr(data
->addr
[i
]);
183 * Called from the #PF handler.
185 void kmemcheck_show(struct pt_regs
*regs
)
187 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
189 BUG_ON(!irqs_disabled());
191 if (unlikely(data
->balance
!= 0)) {
192 kmemcheck_show_all();
193 kmemcheck_error_save_bug(regs
);
199 * None of the addresses actually belonged to kmemcheck. Note that
200 * this is not an error.
202 if (kmemcheck_show_all() == 0)
208 * The IF needs to be cleared as well, so that the faulting
209 * instruction can run "uninterrupted". Otherwise, we might take
210 * an interrupt and start executing that before we've had a chance
211 * to hide the page again.
213 * NOTE: In the rare case of multiple faults, we must not override
214 * the original flags:
216 if (!(regs
->flags
& X86_EFLAGS_TF
))
217 data
->flags
= regs
->flags
;
219 regs
->flags
|= X86_EFLAGS_TF
;
220 regs
->flags
&= ~X86_EFLAGS_IF
;
224 * Called from the #DB handler.
226 void kmemcheck_hide(struct pt_regs
*regs
)
228 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
231 BUG_ON(!irqs_disabled());
233 if (unlikely(data
->balance
!= 1)) {
234 kmemcheck_show_all();
235 kmemcheck_error_save_bug(regs
);
239 if (!(data
->flags
& X86_EFLAGS_TF
))
240 regs
->flags
&= ~X86_EFLAGS_TF
;
241 if (data
->flags
& X86_EFLAGS_IF
)
242 regs
->flags
|= X86_EFLAGS_IF
;
246 if (kmemcheck_enabled
)
247 n
= kmemcheck_hide_all();
249 n
= kmemcheck_show_all();
258 if (!(data
->flags
& X86_EFLAGS_TF
))
259 regs
->flags
&= ~X86_EFLAGS_TF
;
260 if (data
->flags
& X86_EFLAGS_IF
)
261 regs
->flags
|= X86_EFLAGS_IF
;
264 void kmemcheck_show_pages(struct page
*p
, unsigned int n
)
268 for (i
= 0; i
< n
; ++i
) {
269 unsigned long address
;
273 address
= (unsigned long) page_address(&p
[i
]);
274 pte
= lookup_address(address
, &level
);
276 BUG_ON(level
!= PG_LEVEL_4K
);
278 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_PRESENT
));
279 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_HIDDEN
));
280 __flush_tlb_one(address
);
284 bool kmemcheck_page_is_tracked(struct page
*p
)
286 /* This will also check the "hidden" flag of the PTE. */
287 return kmemcheck_pte_lookup((unsigned long) page_address(p
));
290 void kmemcheck_hide_pages(struct page
*p
, unsigned int n
)
294 for (i
= 0; i
< n
; ++i
) {
295 unsigned long address
;
299 address
= (unsigned long) page_address(&p
[i
]);
300 pte
= lookup_address(address
, &level
);
302 BUG_ON(level
!= PG_LEVEL_4K
);
304 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_PRESENT
));
305 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_HIDDEN
));
306 __flush_tlb_one(address
);
310 /* Access may NOT cross page boundary */
311 static void kmemcheck_read_strict(struct pt_regs
*regs
,
312 unsigned long addr
, unsigned int size
)
315 enum kmemcheck_shadow status
;
317 shadow
= kmemcheck_shadow_lookup(addr
);
321 kmemcheck_save_addr(addr
);
322 status
= kmemcheck_shadow_test(shadow
, size
);
323 if (status
== KMEMCHECK_SHADOW_INITIALIZED
)
326 if (kmemcheck_enabled
)
327 kmemcheck_error_save(status
, addr
, size
, regs
);
329 if (kmemcheck_enabled
== 2)
330 kmemcheck_enabled
= 0;
332 /* Don't warn about it again. */
333 kmemcheck_shadow_set(shadow
, size
);
336 bool kmemcheck_is_obj_initialized(unsigned long addr
, size_t size
)
338 enum kmemcheck_shadow status
;
341 shadow
= kmemcheck_shadow_lookup(addr
);
345 status
= kmemcheck_shadow_test_all(shadow
, size
);
347 return status
== KMEMCHECK_SHADOW_INITIALIZED
;
350 /* Access may cross page boundary */
351 static void kmemcheck_read(struct pt_regs
*regs
,
352 unsigned long addr
, unsigned int size
)
354 unsigned long page
= addr
& PAGE_MASK
;
355 unsigned long next_addr
= addr
+ size
- 1;
356 unsigned long next_page
= next_addr
& PAGE_MASK
;
358 if (likely(page
== next_page
)) {
359 kmemcheck_read_strict(regs
, addr
, size
);
364 * What we do is basically to split the access across the
365 * two pages and handle each part separately. Yes, this means
366 * that we may now see reads that are 3 + 5 bytes, for
367 * example (and if both are uninitialized, there will be two
368 * reports), but it makes the code a lot simpler.
370 kmemcheck_read_strict(regs
, addr
, next_page
- addr
);
371 kmemcheck_read_strict(regs
, next_page
, next_addr
- next_page
);
374 static void kmemcheck_write_strict(struct pt_regs
*regs
,
375 unsigned long addr
, unsigned int size
)
379 shadow
= kmemcheck_shadow_lookup(addr
);
383 kmemcheck_save_addr(addr
);
384 kmemcheck_shadow_set(shadow
, size
);
387 static void kmemcheck_write(struct pt_regs
*regs
,
388 unsigned long addr
, unsigned int size
)
390 unsigned long page
= addr
& PAGE_MASK
;
391 unsigned long next_addr
= addr
+ size
- 1;
392 unsigned long next_page
= next_addr
& PAGE_MASK
;
394 if (likely(page
== next_page
)) {
395 kmemcheck_write_strict(regs
, addr
, size
);
399 /* See comment in kmemcheck_read(). */
400 kmemcheck_write_strict(regs
, addr
, next_page
- addr
);
401 kmemcheck_write_strict(regs
, next_page
, next_addr
- next_page
);
405 * Copying is hard. We have two addresses, each of which may be split across
406 * a page (and each page will have different shadow addresses).
408 static void kmemcheck_copy(struct pt_regs
*regs
,
409 unsigned long src_addr
, unsigned long dst_addr
, unsigned int size
)
412 enum kmemcheck_shadow status
;
415 unsigned long next_addr
;
416 unsigned long next_page
;
422 BUG_ON(size
> sizeof(shadow
));
424 page
= src_addr
& PAGE_MASK
;
425 next_addr
= src_addr
+ size
- 1;
426 next_page
= next_addr
& PAGE_MASK
;
428 if (likely(page
== next_page
)) {
430 x
= kmemcheck_shadow_lookup(src_addr
);
432 kmemcheck_save_addr(src_addr
);
433 for (i
= 0; i
< size
; ++i
)
436 for (i
= 0; i
< size
; ++i
)
437 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
440 n
= next_page
- src_addr
;
441 BUG_ON(n
> sizeof(shadow
));
444 x
= kmemcheck_shadow_lookup(src_addr
);
446 kmemcheck_save_addr(src_addr
);
447 for (i
= 0; i
< n
; ++i
)
451 for (i
= 0; i
< n
; ++i
)
452 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
456 x
= kmemcheck_shadow_lookup(next_page
);
458 kmemcheck_save_addr(next_page
);
459 for (i
= n
; i
< size
; ++i
)
460 shadow
[i
] = x
[i
- n
];
463 for (i
= n
; i
< size
; ++i
)
464 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
468 page
= dst_addr
& PAGE_MASK
;
469 next_addr
= dst_addr
+ size
- 1;
470 next_page
= next_addr
& PAGE_MASK
;
472 if (likely(page
== next_page
)) {
474 x
= kmemcheck_shadow_lookup(dst_addr
);
476 kmemcheck_save_addr(dst_addr
);
477 for (i
= 0; i
< size
; ++i
) {
479 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
483 n
= next_page
- dst_addr
;
484 BUG_ON(n
> sizeof(shadow
));
487 x
= kmemcheck_shadow_lookup(dst_addr
);
489 kmemcheck_save_addr(dst_addr
);
490 for (i
= 0; i
< n
; ++i
) {
492 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
497 x
= kmemcheck_shadow_lookup(next_page
);
499 kmemcheck_save_addr(next_page
);
500 for (i
= n
; i
< size
; ++i
) {
501 x
[i
- n
] = shadow
[i
];
502 shadow
[i
] = KMEMCHECK_SHADOW_INITIALIZED
;
507 status
= kmemcheck_shadow_test(shadow
, size
);
508 if (status
== KMEMCHECK_SHADOW_INITIALIZED
)
511 if (kmemcheck_enabled
)
512 kmemcheck_error_save(status
, src_addr
, size
, regs
);
514 if (kmemcheck_enabled
== 2)
515 kmemcheck_enabled
= 0;
518 enum kmemcheck_method
{
523 static void kmemcheck_access(struct pt_regs
*regs
,
524 unsigned long fallback_address
, enum kmemcheck_method fallback_method
)
527 const uint8_t *insn_primary
;
530 struct kmemcheck_context
*data
= this_cpu_ptr(&kmemcheck_context
);
532 /* Recursive fault -- ouch. */
534 kmemcheck_show_addr(fallback_address
);
535 kmemcheck_error_save_bug(regs
);
541 insn
= (const uint8_t *) regs
->ip
;
542 insn_primary
= kmemcheck_opcode_get_primary(insn
);
544 kmemcheck_opcode_decode(insn
, &size
);
546 switch (insn_primary
[0]) {
547 #ifdef CONFIG_KMEMCHECK_BITOPS_OK
550 * Unfortunately, these instructions have to be excluded from
551 * our regular checking since they access only some (and not
552 * all) bits. This clears out "bogus" bitfield-access warnings.
558 switch ((insn_primary
[1] >> 3) & 7) {
565 kmemcheck_write(regs
, fallback_address
, size
);
583 /* MOVS, MOVSB, MOVSW, MOVSD */
587 * These instructions are special because they take two
588 * addresses, but we only get one page fault.
590 kmemcheck_copy(regs
, regs
->si
, regs
->di
, size
);
593 /* CMPS, CMPSB, CMPSW, CMPSD */
596 kmemcheck_read(regs
, regs
->si
, size
);
597 kmemcheck_read(regs
, regs
->di
, size
);
602 * If the opcode isn't special in any way, we use the data from the
603 * page fault handler to determine the address and type of memory
606 switch (fallback_method
) {
608 kmemcheck_read(regs
, fallback_address
, size
);
610 case KMEMCHECK_WRITE
:
611 kmemcheck_write(regs
, fallback_address
, size
);
619 bool kmemcheck_fault(struct pt_regs
*regs
, unsigned long address
,
620 unsigned long error_code
)
625 * XXX: Is it safe to assume that memory accesses from virtual 86
626 * mode or non-kernel code segments will _never_ access kernel
627 * memory (e.g. tracked pages)? For now, we need this to avoid
628 * invoking kmemcheck for PnP BIOS calls.
630 if (regs
->flags
& X86_VM_MASK
)
632 if (regs
->cs
!= __KERNEL_CS
)
635 pte
= kmemcheck_pte_lookup(address
);
639 WARN_ON_ONCE(in_nmi());
642 kmemcheck_access(regs
, address
, KMEMCHECK_WRITE
);
644 kmemcheck_access(regs
, address
, KMEMCHECK_READ
);
646 kmemcheck_show(regs
);
650 bool kmemcheck_trap(struct pt_regs
*regs
)
652 if (!kmemcheck_active(regs
))
656 kmemcheck_hide(regs
);