2 * linux/kernel/power/snapshot.c
4 * This file provide system snapshot/restore functionality.
6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
8 * This file is released under the GPLv2, and is based on swsusp.c.
13 #include <linux/version.h>
14 #include <linux/module.h>
16 #include <linux/suspend.h>
17 #include <linux/smp_lock.h>
18 #include <linux/delay.h>
19 #include <linux/bitops.h>
20 #include <linux/spinlock.h>
21 #include <linux/kernel.h>
23 #include <linux/device.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
29 #include <asm/uaccess.h>
30 #include <asm/mmu_context.h>
31 #include <asm/pgtable.h>
32 #include <asm/tlbflush.h>
37 struct pbe
*pagedir_nosave
;
38 static unsigned int nr_copy_pages
;
39 static unsigned int nr_meta_pages
;
40 static unsigned long *buffer
;
42 struct arch_saveable_page
{
46 struct arch_saveable_page
*next
;
48 static struct arch_saveable_page
*arch_pages
;
50 int swsusp_add_arch_pages(unsigned long start
, unsigned long end
)
52 struct arch_saveable_page
*tmp
;
55 tmp
= kzalloc(sizeof(struct arch_saveable_page
), GFP_KERNEL
);
59 tmp
->end
= ((start
>> PAGE_SHIFT
) + 1) << PAGE_SHIFT
;
62 tmp
->next
= arch_pages
;
69 static unsigned int count_arch_pages(void)
71 unsigned int count
= 0;
72 struct arch_saveable_page
*tmp
= arch_pages
;
80 static int save_arch_mem(void)
83 struct arch_saveable_page
*tmp
= arch_pages
;
86 pr_debug("swsusp: Saving arch specific memory");
88 tmp
->data
= (char *)__get_free_page(GFP_ATOMIC
);
91 offset
= tmp
->start
- (tmp
->start
& PAGE_MASK
);
92 /* arch pages might haven't a 'struct page' */
93 kaddr
= kmap_atomic_pfn(tmp
->start
>> PAGE_SHIFT
, KM_USER0
);
94 memcpy(tmp
->data
+ offset
, kaddr
+ offset
,
95 tmp
->end
- tmp
->start
);
96 kunmap_atomic(kaddr
, KM_USER0
);
103 static int restore_arch_mem(void)
106 struct arch_saveable_page
*tmp
= arch_pages
;
112 offset
= tmp
->start
- (tmp
->start
& PAGE_MASK
);
113 kaddr
= kmap_atomic_pfn(tmp
->start
>> PAGE_SHIFT
, KM_USER0
);
114 memcpy(kaddr
+ offset
, tmp
->data
+ offset
,
115 tmp
->end
- tmp
->start
);
116 kunmap_atomic(kaddr
, KM_USER0
);
117 free_page((long)tmp
->data
);
124 #ifdef CONFIG_HIGHMEM
125 unsigned int count_highmem_pages(void)
128 unsigned long zone_pfn
;
132 if (is_highmem(zone
)) {
133 mark_free_pages(zone
);
134 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; zone_pfn
++) {
136 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
139 page
= pfn_to_page(pfn
);
140 if (PageReserved(page
))
142 if (PageNosaveFree(page
))
150 struct highmem_page
{
153 struct highmem_page
*next
;
156 static struct highmem_page
*highmem_copy
;
158 static int save_highmem_zone(struct zone
*zone
)
160 unsigned long zone_pfn
;
161 mark_free_pages(zone
);
162 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
164 struct highmem_page
*save
;
166 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
172 page
= pfn_to_page(pfn
);
174 * This condition results from rvmalloc() sans vmalloc_32()
175 * and architectural memory reservations. This should be
176 * corrected eventually when the cases giving rise to this
177 * are better understood.
179 if (PageReserved(page
))
181 BUG_ON(PageNosave(page
));
182 if (PageNosaveFree(page
))
184 save
= kmalloc(sizeof(struct highmem_page
), GFP_ATOMIC
);
187 save
->next
= highmem_copy
;
189 save
->data
= (void *) get_zeroed_page(GFP_ATOMIC
);
194 kaddr
= kmap_atomic(page
, KM_USER0
);
195 memcpy(save
->data
, kaddr
, PAGE_SIZE
);
196 kunmap_atomic(kaddr
, KM_USER0
);
202 int save_highmem(void)
207 pr_debug("swsusp: Saving Highmem");
209 for_each_zone (zone
) {
210 if (is_highmem(zone
))
211 res
= save_highmem_zone(zone
);
219 int restore_highmem(void)
221 printk("swsusp: Restoring Highmem\n");
222 while (highmem_copy
) {
223 struct highmem_page
*save
= highmem_copy
;
225 highmem_copy
= save
->next
;
227 kaddr
= kmap_atomic(save
->page
, KM_USER0
);
228 memcpy(kaddr
, save
->data
, PAGE_SIZE
);
229 kunmap_atomic(kaddr
, KM_USER0
);
230 free_page((long) save
->data
);
236 static unsigned int count_highmem_pages(void) {return 0;}
237 static int save_highmem(void) {return 0;}
238 static int restore_highmem(void) {return 0;}
241 unsigned int count_special_pages(void)
243 return count_arch_pages() + count_highmem_pages();
246 int save_special_mem(void)
249 ret
= save_arch_mem();
251 ret
= save_highmem();
255 int restore_special_mem(void)
258 ret
= restore_arch_mem();
260 ret
= restore_highmem();
264 static int pfn_is_nosave(unsigned long pfn
)
266 unsigned long nosave_begin_pfn
= __pa(&__nosave_begin
) >> PAGE_SHIFT
;
267 unsigned long nosave_end_pfn
= PAGE_ALIGN(__pa(&__nosave_end
)) >> PAGE_SHIFT
;
268 return (pfn
>= nosave_begin_pfn
) && (pfn
< nosave_end_pfn
);
272 * saveable - Determine whether a page should be cloned or not.
275 * We save a page if it's Reserved, and not in the range of pages
276 * statically defined as 'unsaveable', or if it isn't reserved, and
277 * isn't part of a free chunk of pages.
280 static int saveable(struct zone
*zone
, unsigned long *zone_pfn
)
282 unsigned long pfn
= *zone_pfn
+ zone
->zone_start_pfn
;
288 page
= pfn_to_page(pfn
);
289 if (PageNosave(page
))
291 if (PageReserved(page
) && pfn_is_nosave(pfn
))
293 if (PageNosaveFree(page
))
299 unsigned int count_data_pages(void)
302 unsigned long zone_pfn
;
305 for_each_zone (zone
) {
306 if (is_highmem(zone
))
308 mark_free_pages(zone
);
309 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
310 n
+= saveable(zone
, &zone_pfn
);
315 static void copy_data_pages(struct pbe
*pblist
)
318 unsigned long zone_pfn
;
322 for_each_zone (zone
) {
323 if (is_highmem(zone
))
325 mark_free_pages(zone
);
326 /* This is necessary for swsusp_free() */
327 for_each_pb_page (p
, pblist
)
328 SetPageNosaveFree(virt_to_page(p
));
329 for_each_pbe (p
, pblist
)
330 SetPageNosaveFree(virt_to_page(p
->address
));
331 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
332 if (saveable(zone
, &zone_pfn
)) {
334 page
= pfn_to_page(zone_pfn
+ zone
->zone_start_pfn
);
336 pbe
->orig_address
= (unsigned long)page_address(page
);
337 /* copy_page is not usable for copying task structs. */
338 memcpy((void *)pbe
->address
, (void *)pbe
->orig_address
, PAGE_SIZE
);
348 * free_pagedir - free pages allocated with alloc_pagedir()
351 static void free_pagedir(struct pbe
*pblist
, int clear_nosave_free
)
356 pbe
= (pblist
+ PB_PAGE_SKIP
)->next
;
357 ClearPageNosave(virt_to_page(pblist
));
358 if (clear_nosave_free
)
359 ClearPageNosaveFree(virt_to_page(pblist
));
360 free_page((unsigned long)pblist
);
366 * fill_pb_page - Create a list of PBEs on a given memory page
369 static inline void fill_pb_page(struct pbe
*pbpage
)
374 pbpage
+= PB_PAGE_SKIP
;
377 while (++p
< pbpage
);
381 * create_pbe_list - Create a list of PBEs on top of a given chain
382 * of memory pages allocated with alloc_pagedir()
385 static inline void create_pbe_list(struct pbe
*pblist
, unsigned int nr_pages
)
387 struct pbe
*pbpage
, *p
;
388 unsigned int num
= PBES_PER_PAGE
;
390 for_each_pb_page (pbpage
, pblist
) {
394 fill_pb_page(pbpage
);
395 num
+= PBES_PER_PAGE
;
398 for (num
-= PBES_PER_PAGE
- 1, p
= pbpage
; num
< nr_pages
; p
++, num
++)
405 * On resume it is necessary to trace and eventually free the unsafe
406 * pages that have been allocated, because they are needed for I/O
407 * (on x86-64 we likely will "eat" these pages once again while
408 * creating the temporary page translation tables)
412 struct eaten_page
*next
;
413 char padding
[PAGE_SIZE
- sizeof(void *)];
416 static struct eaten_page
*eaten_pages
= NULL
;
418 static void release_eaten_pages(void)
420 struct eaten_page
*p
, *q
;
425 /* We don't want swsusp_free() to free this page again */
426 ClearPageNosave(virt_to_page(p
));
427 free_page((unsigned long)p
);
434 * @safe_needed - on resume, for storing the PBE list and the image,
435 * we can only use memory pages that do not conflict with the pages
436 * which had been used before suspend.
438 * The unsafe pages are marked with the PG_nosave_free flag
440 * Allocated but unusable (ie eaten) memory pages should be marked
441 * so that swsusp_free() can release them
444 static inline void *alloc_image_page(gfp_t gfp_mask
, int safe_needed
)
450 res
= (void *)get_zeroed_page(gfp_mask
);
451 if (res
&& PageNosaveFree(virt_to_page(res
))) {
452 /* This is for swsusp_free() */
453 SetPageNosave(virt_to_page(res
));
454 ((struct eaten_page
*)res
)->next
= eaten_pages
;
457 } while (res
&& PageNosaveFree(virt_to_page(res
)));
459 res
= (void *)get_zeroed_page(gfp_mask
);
461 SetPageNosave(virt_to_page(res
));
462 SetPageNosaveFree(virt_to_page(res
));
467 unsigned long get_safe_page(gfp_t gfp_mask
)
469 return (unsigned long)alloc_image_page(gfp_mask
, 1);
473 * alloc_pagedir - Allocate the page directory.
475 * First, determine exactly how many pages we need and
478 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
479 * struct pbe elements (pbes) and the last element in the page points
482 * On each page we set up a list of struct_pbe elements.
485 struct pbe
*alloc_pagedir(unsigned int nr_pages
, gfp_t gfp_mask
, int safe_needed
)
488 struct pbe
*pblist
, *pbe
;
493 pblist
= alloc_image_page(gfp_mask
, safe_needed
);
494 /* FIXME: rewrite this ugly loop */
495 for (pbe
= pblist
, num
= PBES_PER_PAGE
; pbe
&& num
< nr_pages
;
496 pbe
= pbe
->next
, num
+= PBES_PER_PAGE
) {
498 pbe
->next
= alloc_image_page(gfp_mask
, safe_needed
);
500 if (!pbe
) { /* get_zeroed_page() failed */
501 free_pagedir(pblist
, 1);
504 create_pbe_list(pblist
, nr_pages
);
509 * Free pages we allocated for suspend. Suspend pages are alocated
510 * before atomic copy, so we need to free them after resume.
513 void swsusp_free(void)
516 unsigned long zone_pfn
;
518 for_each_zone(zone
) {
519 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
520 if (pfn_valid(zone_pfn
+ zone
->zone_start_pfn
)) {
522 page
= pfn_to_page(zone_pfn
+ zone
->zone_start_pfn
);
523 if (PageNosave(page
) && PageNosaveFree(page
)) {
524 ClearPageNosave(page
);
525 ClearPageNosaveFree(page
);
526 free_page((long) page_address(page
));
532 pagedir_nosave
= NULL
;
538 * enough_free_mem - Make sure we enough free memory to snapshot.
540 * Returns TRUE or FALSE after checking the number of available
544 static int enough_free_mem(unsigned int nr_pages
)
550 if (!is_highmem(zone
))
551 n
+= zone
->free_pages
;
552 pr_debug("swsusp: available memory: %u pages\n", n
);
553 return n
> (nr_pages
+ PAGES_FOR_IO
+
554 (nr_pages
+ PBES_PER_PAGE
- 1) / PBES_PER_PAGE
);
557 static int alloc_data_pages(struct pbe
*pblist
, gfp_t gfp_mask
, int safe_needed
)
561 for_each_pbe (p
, pblist
) {
562 p
->address
= (unsigned long)alloc_image_page(gfp_mask
, safe_needed
);
569 static struct pbe
*swsusp_alloc(unsigned int nr_pages
)
573 if (!(pblist
= alloc_pagedir(nr_pages
, GFP_ATOMIC
| __GFP_COLD
, 0))) {
574 printk(KERN_ERR
"suspend: Allocating pagedir failed.\n");
578 if (alloc_data_pages(pblist
, GFP_ATOMIC
| __GFP_COLD
, 0)) {
579 printk(KERN_ERR
"suspend: Allocating image pages failed.\n");
587 asmlinkage
int swsusp_save(void)
589 unsigned int nr_pages
;
591 pr_debug("swsusp: critical section: \n");
594 nr_pages
= count_data_pages();
595 printk("swsusp: Need to copy %u pages\n", nr_pages
);
597 pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
599 (nr_pages
+ PBES_PER_PAGE
- 1) / PBES_PER_PAGE
,
600 PAGES_FOR_IO
, nr_free_pages());
602 if (!enough_free_mem(nr_pages
)) {
603 printk(KERN_ERR
"swsusp: Not enough free memory\n");
607 pagedir_nosave
= swsusp_alloc(nr_pages
);
611 /* During allocating of suspend pagedir, new cold pages may appear.
615 copy_data_pages(pagedir_nosave
);
618 * End of critical section. From now on, we can write to memory,
619 * but we should not touch disk. This specially means we must _not_
620 * touch swap space! Except we must write out our image of course.
623 nr_copy_pages
= nr_pages
;
624 nr_meta_pages
= (nr_pages
* sizeof(long) + PAGE_SIZE
- 1) >> PAGE_SHIFT
;
626 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages
);
630 static void init_header(struct swsusp_info
*info
)
632 memset(info
, 0, sizeof(struct swsusp_info
));
633 info
->version_code
= LINUX_VERSION_CODE
;
634 info
->num_physpages
= num_physpages
;
635 memcpy(&info
->uts
, &system_utsname
, sizeof(system_utsname
));
636 info
->cpus
= num_online_cpus();
637 info
->image_pages
= nr_copy_pages
;
638 info
->pages
= nr_copy_pages
+ nr_meta_pages
+ 1;
639 info
->size
= info
->pages
;
640 info
->size
<<= PAGE_SHIFT
;
644 * pack_orig_addresses - the .orig_address fields of the PBEs from the
645 * list starting at @pbe are stored in the array @buf[] (1 page)
648 static inline struct pbe
*pack_orig_addresses(unsigned long *buf
, struct pbe
*pbe
)
652 for (j
= 0; j
< PAGE_SIZE
/ sizeof(long) && pbe
; j
++) {
653 buf
[j
] = pbe
->orig_address
;
657 for (; j
< PAGE_SIZE
/ sizeof(long); j
++)
663 * snapshot_read_next - used for reading the system memory snapshot.
665 * On the first call to it @handle should point to a zeroed
666 * snapshot_handle structure. The structure gets updated and a pointer
667 * to it should be passed to this function every next time.
669 * The @count parameter should contain the number of bytes the caller
670 * wants to read from the snapshot. It must not be zero.
672 * On success the function returns a positive number. Then, the caller
673 * is allowed to read up to the returned number of bytes from the memory
674 * location computed by the data_of() macro. The number returned
675 * may be smaller than @count, but this only happens if the read would
676 * cross a page boundary otherwise.
678 * The function returns 0 to indicate the end of data stream condition,
679 * and a negative number is returned on error. In such cases the
680 * structure pointed to by @handle is not updated and should not be used
684 int snapshot_read_next(struct snapshot_handle
*handle
, size_t count
)
686 if (handle
->page
> nr_meta_pages
+ nr_copy_pages
)
689 /* This makes the buffer be freed by swsusp_free() */
690 buffer
= alloc_image_page(GFP_ATOMIC
, 0);
694 if (!handle
->offset
) {
695 init_header((struct swsusp_info
*)buffer
);
696 handle
->buffer
= buffer
;
697 handle
->pbe
= pagedir_nosave
;
699 if (handle
->prev
< handle
->page
) {
700 if (handle
->page
<= nr_meta_pages
) {
701 handle
->pbe
= pack_orig_addresses(buffer
, handle
->pbe
);
703 handle
->pbe
= pagedir_nosave
;
705 handle
->buffer
= (void *)handle
->pbe
->address
;
706 handle
->pbe
= handle
->pbe
->next
;
708 handle
->prev
= handle
->page
;
710 handle
->buf_offset
= handle
->page_offset
;
711 if (handle
->page_offset
+ count
>= PAGE_SIZE
) {
712 count
= PAGE_SIZE
- handle
->page_offset
;
713 handle
->page_offset
= 0;
716 handle
->page_offset
+= count
;
718 handle
->offset
+= count
;
723 * mark_unsafe_pages - mark the pages that cannot be used for storing
724 * the image during resume, because they conflict with the pages that
725 * had been used before suspend
728 static int mark_unsafe_pages(struct pbe
*pblist
)
731 unsigned long zone_pfn
;
734 if (!pblist
) /* a sanity check */
737 /* Clear page flags */
738 for_each_zone (zone
) {
739 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
740 if (pfn_valid(zone_pfn
+ zone
->zone_start_pfn
))
741 ClearPageNosaveFree(pfn_to_page(zone_pfn
+
742 zone
->zone_start_pfn
));
745 /* Mark orig addresses */
746 for_each_pbe (p
, pblist
) {
747 if (virt_addr_valid(p
->orig_address
))
748 SetPageNosaveFree(virt_to_page(p
->orig_address
));
756 static void copy_page_backup_list(struct pbe
*dst
, struct pbe
*src
)
758 /* We assume both lists contain the same number of elements */
760 dst
->orig_address
= src
->orig_address
;
766 static int check_header(struct swsusp_info
*info
)
770 if (info
->version_code
!= LINUX_VERSION_CODE
)
771 reason
= "kernel version";
772 if (info
->num_physpages
!= num_physpages
)
773 reason
= "memory size";
774 if (strcmp(info
->uts
.sysname
,system_utsname
.sysname
))
775 reason
= "system type";
776 if (strcmp(info
->uts
.release
,system_utsname
.release
))
777 reason
= "kernel release";
778 if (strcmp(info
->uts
.version
,system_utsname
.version
))
780 if (strcmp(info
->uts
.machine
,system_utsname
.machine
))
783 printk(KERN_ERR
"swsusp: Resume mismatch: %s\n", reason
);
790 * load header - check the image header and copy data from it
793 static int load_header(struct snapshot_handle
*handle
,
794 struct swsusp_info
*info
)
799 error
= check_header(info
);
801 pblist
= alloc_pagedir(info
->image_pages
, GFP_ATOMIC
, 0);
804 pagedir_nosave
= pblist
;
805 handle
->pbe
= pblist
;
806 nr_copy_pages
= info
->image_pages
;
807 nr_meta_pages
= info
->pages
- info
->image_pages
- 1;
813 * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
814 * the PBEs in the list starting at @pbe
817 static inline struct pbe
*unpack_orig_addresses(unsigned long *buf
,
822 for (j
= 0; j
< PAGE_SIZE
/ sizeof(long) && pbe
; j
++) {
823 pbe
->orig_address
= buf
[j
];
830 * create_image - use metadata contained in the PBE list
831 * pointed to by pagedir_nosave to mark the pages that will
832 * be overwritten in the process of restoring the system
833 * memory state from the image and allocate memory for
834 * the image avoiding these pages
837 static int create_image(struct snapshot_handle
*handle
)
840 struct pbe
*p
, *pblist
;
843 error
= mark_unsafe_pages(p
);
845 pblist
= alloc_pagedir(nr_copy_pages
, GFP_ATOMIC
, 1);
847 copy_page_backup_list(pblist
, p
);
853 error
= alloc_data_pages(pblist
, GFP_ATOMIC
, 1);
855 release_eaten_pages();
856 pagedir_nosave
= pblist
;
858 pagedir_nosave
= NULL
;
867 * snapshot_write_next - used for writing the system memory snapshot.
869 * On the first call to it @handle should point to a zeroed
870 * snapshot_handle structure. The structure gets updated and a pointer
871 * to it should be passed to this function every next time.
873 * The @count parameter should contain the number of bytes the caller
874 * wants to write to the image. It must not be zero.
876 * On success the function returns a positive number. Then, the caller
877 * is allowed to write up to the returned number of bytes to the memory
878 * location computed by the data_of() macro. The number returned
879 * may be smaller than @count, but this only happens if the write would
880 * cross a page boundary otherwise.
882 * The function returns 0 to indicate the "end of file" condition,
883 * and a negative number is returned on error. In such cases the
884 * structure pointed to by @handle is not updated and should not be used
888 int snapshot_write_next(struct snapshot_handle
*handle
, size_t count
)
892 if (handle
->prev
&& handle
->page
> nr_meta_pages
+ nr_copy_pages
)
895 /* This makes the buffer be freed by swsusp_free() */
896 buffer
= alloc_image_page(GFP_ATOMIC
, 0);
901 handle
->buffer
= buffer
;
902 if (handle
->prev
< handle
->page
) {
904 error
= load_header(handle
, (struct swsusp_info
*)buffer
);
907 } else if (handle
->prev
<= nr_meta_pages
) {
908 handle
->pbe
= unpack_orig_addresses(buffer
, handle
->pbe
);
910 error
= create_image(handle
);
913 handle
->pbe
= pagedir_nosave
;
914 handle
->buffer
= (void *)handle
->pbe
->address
;
917 handle
->pbe
= handle
->pbe
->next
;
918 handle
->buffer
= (void *)handle
->pbe
->address
;
920 handle
->prev
= handle
->page
;
922 handle
->buf_offset
= handle
->page_offset
;
923 if (handle
->page_offset
+ count
>= PAGE_SIZE
) {
924 count
= PAGE_SIZE
- handle
->page_offset
;
925 handle
->page_offset
= 0;
928 handle
->page_offset
+= count
;
930 handle
->offset
+= count
;
934 int snapshot_image_loaded(struct snapshot_handle
*handle
)
936 return !(!handle
->pbe
|| handle
->pbe
->next
|| !nr_copy_pages
||
937 handle
->page
<= nr_meta_pages
+ nr_copy_pages
);