2 * linux/kernel/power/snapshot.c
4 * This file provide system snapshot/restore functionality.
6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
8 * This file is released under the GPLv2, and is based on swsusp.c.
13 #include <linux/version.h>
14 #include <linux/module.h>
16 #include <linux/suspend.h>
17 #include <linux/smp_lock.h>
18 #include <linux/delay.h>
19 #include <linux/bitops.h>
20 #include <linux/spinlock.h>
21 #include <linux/kernel.h>
23 #include <linux/device.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
29 #include <asm/uaccess.h>
30 #include <asm/mmu_context.h>
31 #include <asm/pgtable.h>
32 #include <asm/tlbflush.h>
37 struct pbe
*pagedir_nosave
;
38 static unsigned int nr_copy_pages
;
39 static unsigned int nr_meta_pages
;
40 static unsigned long *buffer
;
43 unsigned int count_highmem_pages(void)
46 unsigned long zone_pfn
;
50 if (is_highmem(zone
)) {
51 mark_free_pages(zone
);
52 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; zone_pfn
++) {
54 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
57 page
= pfn_to_page(pfn
);
58 if (PageReserved(page
))
60 if (PageNosaveFree(page
))
71 struct highmem_page
*next
;
74 static struct highmem_page
*highmem_copy
;
76 static int save_highmem_zone(struct zone
*zone
)
78 unsigned long zone_pfn
;
79 mark_free_pages(zone
);
80 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
82 struct highmem_page
*save
;
84 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
90 page
= pfn_to_page(pfn
);
92 * This condition results from rvmalloc() sans vmalloc_32()
93 * and architectural memory reservations. This should be
94 * corrected eventually when the cases giving rise to this
95 * are better understood.
97 if (PageReserved(page
))
99 BUG_ON(PageNosave(page
));
100 if (PageNosaveFree(page
))
102 save
= kmalloc(sizeof(struct highmem_page
), GFP_ATOMIC
);
105 save
->next
= highmem_copy
;
107 save
->data
= (void *) get_zeroed_page(GFP_ATOMIC
);
112 kaddr
= kmap_atomic(page
, KM_USER0
);
113 memcpy(save
->data
, kaddr
, PAGE_SIZE
);
114 kunmap_atomic(kaddr
, KM_USER0
);
120 int save_highmem(void)
125 pr_debug("swsusp: Saving Highmem");
127 for_each_zone (zone
) {
128 if (is_highmem(zone
))
129 res
= save_highmem_zone(zone
);
137 int restore_highmem(void)
139 printk("swsusp: Restoring Highmem\n");
140 while (highmem_copy
) {
141 struct highmem_page
*save
= highmem_copy
;
143 highmem_copy
= save
->next
;
145 kaddr
= kmap_atomic(save
->page
, KM_USER0
);
146 memcpy(kaddr
, save
->data
, PAGE_SIZE
);
147 kunmap_atomic(kaddr
, KM_USER0
);
148 free_page((long) save
->data
);
154 static inline unsigned int count_highmem_pages(void) {return 0;}
155 static inline int save_highmem(void) {return 0;}
156 static inline int restore_highmem(void) {return 0;}
159 static int pfn_is_nosave(unsigned long pfn
)
161 unsigned long nosave_begin_pfn
= __pa(&__nosave_begin
) >> PAGE_SHIFT
;
162 unsigned long nosave_end_pfn
= PAGE_ALIGN(__pa(&__nosave_end
)) >> PAGE_SHIFT
;
163 return (pfn
>= nosave_begin_pfn
) && (pfn
< nosave_end_pfn
);
167 * saveable - Determine whether a page should be cloned or not.
170 * We save a page if it's Reserved, and not in the range of pages
171 * statically defined as 'unsaveable', or if it isn't reserved, and
172 * isn't part of a free chunk of pages.
175 static int saveable(struct zone
*zone
, unsigned long *zone_pfn
)
177 unsigned long pfn
= *zone_pfn
+ zone
->zone_start_pfn
;
183 page
= pfn_to_page(pfn
);
184 BUG_ON(PageReserved(page
) && PageNosave(page
));
185 if (PageNosave(page
))
187 if (PageReserved(page
) && pfn_is_nosave(pfn
))
189 if (PageNosaveFree(page
))
195 unsigned int count_data_pages(void)
198 unsigned long zone_pfn
;
201 for_each_zone (zone
) {
202 if (is_highmem(zone
))
204 mark_free_pages(zone
);
205 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
206 n
+= saveable(zone
, &zone_pfn
);
211 static void copy_data_pages(struct pbe
*pblist
)
214 unsigned long zone_pfn
;
218 for_each_zone (zone
) {
219 if (is_highmem(zone
))
221 mark_free_pages(zone
);
222 /* This is necessary for swsusp_free() */
223 for_each_pb_page (p
, pblist
)
224 SetPageNosaveFree(virt_to_page(p
));
225 for_each_pbe (p
, pblist
)
226 SetPageNosaveFree(virt_to_page(p
->address
));
227 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
228 if (saveable(zone
, &zone_pfn
)) {
233 page
= pfn_to_page(zone_pfn
+ zone
->zone_start_pfn
);
235 pbe
->orig_address
= (unsigned long)page_address(page
);
236 /* copy_page and memcpy are not usable for copying task structs. */
237 dst
= (long *)pbe
->address
;
238 src
= (long *)pbe
->orig_address
;
239 for (n
= PAGE_SIZE
/ sizeof(long); n
; n
--)
250 * free_pagedir - free pages allocated with alloc_pagedir()
253 static void free_pagedir(struct pbe
*pblist
, int clear_nosave_free
)
258 pbe
= (pblist
+ PB_PAGE_SKIP
)->next
;
259 ClearPageNosave(virt_to_page(pblist
));
260 if (clear_nosave_free
)
261 ClearPageNosaveFree(virt_to_page(pblist
));
262 free_page((unsigned long)pblist
);
268 * fill_pb_page - Create a list of PBEs on a given memory page
271 static inline void fill_pb_page(struct pbe
*pbpage
)
276 pbpage
+= PB_PAGE_SKIP
;
279 while (++p
< pbpage
);
283 * create_pbe_list - Create a list of PBEs on top of a given chain
284 * of memory pages allocated with alloc_pagedir()
287 static inline void create_pbe_list(struct pbe
*pblist
, unsigned int nr_pages
)
289 struct pbe
*pbpage
, *p
;
290 unsigned int num
= PBES_PER_PAGE
;
292 for_each_pb_page (pbpage
, pblist
) {
296 fill_pb_page(pbpage
);
297 num
+= PBES_PER_PAGE
;
300 for (num
-= PBES_PER_PAGE
- 1, p
= pbpage
; num
< nr_pages
; p
++, num
++)
306 static unsigned int unsafe_pages
;
309 * @safe_needed - on resume, for storing the PBE list and the image,
310 * we can only use memory pages that do not conflict with the pages
311 * used before suspend.
313 * The unsafe pages are marked with the PG_nosave_free flag
314 * and we count them using unsafe_pages
317 static inline void *alloc_image_page(gfp_t gfp_mask
, int safe_needed
)
321 res
= (void *)get_zeroed_page(gfp_mask
);
323 while (res
&& PageNosaveFree(virt_to_page(res
))) {
324 /* The page is unsafe, mark it for swsusp_free() */
325 SetPageNosave(virt_to_page(res
));
327 res
= (void *)get_zeroed_page(gfp_mask
);
330 SetPageNosave(virt_to_page(res
));
331 SetPageNosaveFree(virt_to_page(res
));
336 unsigned long get_safe_page(gfp_t gfp_mask
)
338 return (unsigned long)alloc_image_page(gfp_mask
, 1);
342 * alloc_pagedir - Allocate the page directory.
344 * First, determine exactly how many pages we need and
347 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
348 * struct pbe elements (pbes) and the last element in the page points
351 * On each page we set up a list of struct_pbe elements.
354 static struct pbe
*alloc_pagedir(unsigned int nr_pages
, gfp_t gfp_mask
,
358 struct pbe
*pblist
, *pbe
;
363 pblist
= alloc_image_page(gfp_mask
, safe_needed
);
364 /* FIXME: rewrite this ugly loop */
365 for (pbe
= pblist
, num
= PBES_PER_PAGE
; pbe
&& num
< nr_pages
;
366 pbe
= pbe
->next
, num
+= PBES_PER_PAGE
) {
368 pbe
->next
= alloc_image_page(gfp_mask
, safe_needed
);
370 if (!pbe
) { /* get_zeroed_page() failed */
371 free_pagedir(pblist
, 1);
374 create_pbe_list(pblist
, nr_pages
);
379 * Free pages we allocated for suspend. Suspend pages are alocated
380 * before atomic copy, so we need to free them after resume.
383 void swsusp_free(void)
386 unsigned long zone_pfn
;
388 for_each_zone(zone
) {
389 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
390 if (pfn_valid(zone_pfn
+ zone
->zone_start_pfn
)) {
392 page
= pfn_to_page(zone_pfn
+ zone
->zone_start_pfn
);
393 if (PageNosave(page
) && PageNosaveFree(page
)) {
394 ClearPageNosave(page
);
395 ClearPageNosaveFree(page
);
396 free_page((long) page_address(page
));
402 pagedir_nosave
= NULL
;
408 * enough_free_mem - Make sure we enough free memory to snapshot.
410 * Returns TRUE or FALSE after checking the number of available
414 static int enough_free_mem(unsigned int nr_pages
)
420 if (!is_highmem(zone
))
421 n
+= zone
->free_pages
;
422 pr_debug("swsusp: available memory: %u pages\n", n
);
423 return n
> (nr_pages
+ PAGES_FOR_IO
+
424 (nr_pages
+ PBES_PER_PAGE
- 1) / PBES_PER_PAGE
);
427 static int alloc_data_pages(struct pbe
*pblist
, gfp_t gfp_mask
, int safe_needed
)
431 for_each_pbe (p
, pblist
) {
432 p
->address
= (unsigned long)alloc_image_page(gfp_mask
, safe_needed
);
439 static struct pbe
*swsusp_alloc(unsigned int nr_pages
)
443 if (!(pblist
= alloc_pagedir(nr_pages
, GFP_ATOMIC
| __GFP_COLD
, 0))) {
444 printk(KERN_ERR
"suspend: Allocating pagedir failed.\n");
448 if (alloc_data_pages(pblist
, GFP_ATOMIC
| __GFP_COLD
, 0)) {
449 printk(KERN_ERR
"suspend: Allocating image pages failed.\n");
457 asmlinkage
int swsusp_save(void)
459 unsigned int nr_pages
;
461 pr_debug("swsusp: critical section: \n");
464 nr_pages
= count_data_pages();
465 printk("swsusp: Need to copy %u pages\n", nr_pages
);
467 pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
469 (nr_pages
+ PBES_PER_PAGE
- 1) / PBES_PER_PAGE
,
470 PAGES_FOR_IO
, nr_free_pages());
472 if (!enough_free_mem(nr_pages
)) {
473 printk(KERN_ERR
"swsusp: Not enough free memory\n");
477 pagedir_nosave
= swsusp_alloc(nr_pages
);
481 /* During allocating of suspend pagedir, new cold pages may appear.
485 copy_data_pages(pagedir_nosave
);
488 * End of critical section. From now on, we can write to memory,
489 * but we should not touch disk. This specially means we must _not_
490 * touch swap space! Except we must write out our image of course.
493 nr_copy_pages
= nr_pages
;
494 nr_meta_pages
= (nr_pages
* sizeof(long) + PAGE_SIZE
- 1) >> PAGE_SHIFT
;
496 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages
);
500 static void init_header(struct swsusp_info
*info
)
502 memset(info
, 0, sizeof(struct swsusp_info
));
503 info
->version_code
= LINUX_VERSION_CODE
;
504 info
->num_physpages
= num_physpages
;
505 memcpy(&info
->uts
, &system_utsname
, sizeof(system_utsname
));
506 info
->cpus
= num_online_cpus();
507 info
->image_pages
= nr_copy_pages
;
508 info
->pages
= nr_copy_pages
+ nr_meta_pages
+ 1;
509 info
->size
= info
->pages
;
510 info
->size
<<= PAGE_SHIFT
;
514 * pack_orig_addresses - the .orig_address fields of the PBEs from the
515 * list starting at @pbe are stored in the array @buf[] (1 page)
518 static inline struct pbe
*pack_orig_addresses(unsigned long *buf
, struct pbe
*pbe
)
522 for (j
= 0; j
< PAGE_SIZE
/ sizeof(long) && pbe
; j
++) {
523 buf
[j
] = pbe
->orig_address
;
527 for (; j
< PAGE_SIZE
/ sizeof(long); j
++)
533 * snapshot_read_next - used for reading the system memory snapshot.
535 * On the first call to it @handle should point to a zeroed
536 * snapshot_handle structure. The structure gets updated and a pointer
537 * to it should be passed to this function every next time.
539 * The @count parameter should contain the number of bytes the caller
540 * wants to read from the snapshot. It must not be zero.
542 * On success the function returns a positive number. Then, the caller
543 * is allowed to read up to the returned number of bytes from the memory
544 * location computed by the data_of() macro. The number returned
545 * may be smaller than @count, but this only happens if the read would
546 * cross a page boundary otherwise.
548 * The function returns 0 to indicate the end of data stream condition,
549 * and a negative number is returned on error. In such cases the
550 * structure pointed to by @handle is not updated and should not be used
554 int snapshot_read_next(struct snapshot_handle
*handle
, size_t count
)
556 if (handle
->page
> nr_meta_pages
+ nr_copy_pages
)
559 /* This makes the buffer be freed by swsusp_free() */
560 buffer
= alloc_image_page(GFP_ATOMIC
, 0);
564 if (!handle
->offset
) {
565 init_header((struct swsusp_info
*)buffer
);
566 handle
->buffer
= buffer
;
567 handle
->pbe
= pagedir_nosave
;
569 if (handle
->prev
< handle
->page
) {
570 if (handle
->page
<= nr_meta_pages
) {
571 handle
->pbe
= pack_orig_addresses(buffer
, handle
->pbe
);
573 handle
->pbe
= pagedir_nosave
;
575 handle
->buffer
= (void *)handle
->pbe
->address
;
576 handle
->pbe
= handle
->pbe
->next
;
578 handle
->prev
= handle
->page
;
580 handle
->buf_offset
= handle
->page_offset
;
581 if (handle
->page_offset
+ count
>= PAGE_SIZE
) {
582 count
= PAGE_SIZE
- handle
->page_offset
;
583 handle
->page_offset
= 0;
586 handle
->page_offset
+= count
;
588 handle
->offset
+= count
;
593 * mark_unsafe_pages - mark the pages that cannot be used for storing
594 * the image during resume, because they conflict with the pages that
595 * had been used before suspend
598 static int mark_unsafe_pages(struct pbe
*pblist
)
601 unsigned long zone_pfn
;
604 if (!pblist
) /* a sanity check */
607 /* Clear page flags */
608 for_each_zone (zone
) {
609 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
610 if (pfn_valid(zone_pfn
+ zone
->zone_start_pfn
))
611 ClearPageNosaveFree(pfn_to_page(zone_pfn
+
612 zone
->zone_start_pfn
));
615 /* Mark orig addresses */
616 for_each_pbe (p
, pblist
) {
617 if (virt_addr_valid(p
->orig_address
))
618 SetPageNosaveFree(virt_to_page(p
->orig_address
));
628 static void copy_page_backup_list(struct pbe
*dst
, struct pbe
*src
)
630 /* We assume both lists contain the same number of elements */
632 dst
->orig_address
= src
->orig_address
;
638 static int check_header(struct swsusp_info
*info
)
642 if (info
->version_code
!= LINUX_VERSION_CODE
)
643 reason
= "kernel version";
644 if (info
->num_physpages
!= num_physpages
)
645 reason
= "memory size";
646 if (strcmp(info
->uts
.sysname
,system_utsname
.sysname
))
647 reason
= "system type";
648 if (strcmp(info
->uts
.release
,system_utsname
.release
))
649 reason
= "kernel release";
650 if (strcmp(info
->uts
.version
,system_utsname
.version
))
652 if (strcmp(info
->uts
.machine
,system_utsname
.machine
))
655 printk(KERN_ERR
"swsusp: Resume mismatch: %s\n", reason
);
662 * load header - check the image header and copy data from it
665 static int load_header(struct snapshot_handle
*handle
,
666 struct swsusp_info
*info
)
671 error
= check_header(info
);
673 pblist
= alloc_pagedir(info
->image_pages
, GFP_ATOMIC
, 0);
676 pagedir_nosave
= pblist
;
677 handle
->pbe
= pblist
;
678 nr_copy_pages
= info
->image_pages
;
679 nr_meta_pages
= info
->pages
- info
->image_pages
- 1;
685 * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
686 * the PBEs in the list starting at @pbe
689 static inline struct pbe
*unpack_orig_addresses(unsigned long *buf
,
694 for (j
= 0; j
< PAGE_SIZE
/ sizeof(long) && pbe
; j
++) {
695 pbe
->orig_address
= buf
[j
];
702 * prepare_image - use metadata contained in the PBE list
703 * pointed to by pagedir_nosave to mark the pages that will
704 * be overwritten in the process of restoring the system
705 * memory state from the image ("unsafe" pages) and allocate
706 * memory for the image
708 * The idea is to allocate the PBE list first and then
709 * allocate as many pages as it's needed for the image data,
710 * but not to assign these pages to the PBEs initially.
711 * Instead, we just mark them as allocated and create a list
712 * of "safe" which will be used later
716 struct safe_page
*next
;
717 char padding
[PAGE_SIZE
- sizeof(void *)];
720 static struct safe_page
*safe_pages
;
722 static int prepare_image(struct snapshot_handle
*handle
)
725 unsigned int nr_pages
= nr_copy_pages
;
726 struct pbe
*p
, *pblist
= NULL
;
729 error
= mark_unsafe_pages(p
);
731 pblist
= alloc_pagedir(nr_pages
, GFP_ATOMIC
, 1);
733 copy_page_backup_list(pblist
, p
);
739 if (!error
&& nr_pages
> unsafe_pages
) {
740 nr_pages
-= unsafe_pages
;
742 struct safe_page
*ptr
;
744 ptr
= (struct safe_page
*)get_zeroed_page(GFP_ATOMIC
);
749 if (!PageNosaveFree(virt_to_page(ptr
))) {
750 /* The page is "safe", add it to the list */
751 ptr
->next
= safe_pages
;
754 /* Mark the page as allocated */
755 SetPageNosave(virt_to_page(ptr
));
756 SetPageNosaveFree(virt_to_page(ptr
));
760 pagedir_nosave
= pblist
;
768 static void *get_buffer(struct snapshot_handle
*handle
)
770 struct pbe
*pbe
= handle
->pbe
, *last
= handle
->last_pbe
;
771 struct page
*page
= virt_to_page(pbe
->orig_address
);
773 if (PageNosave(page
) && PageNosaveFree(page
)) {
775 * We have allocated the "original" page frame and we can
776 * use it directly to store the read page
779 if (last
&& last
->next
)
781 return (void *)pbe
->orig_address
;
784 * The "original" page frame has not been allocated and we have to
785 * use a "safe" page frame to store the read page
787 pbe
->address
= (unsigned long)safe_pages
;
788 safe_pages
= safe_pages
->next
;
791 handle
->last_pbe
= pbe
;
792 return (void *)pbe
->address
;
796 * snapshot_write_next - used for writing the system memory snapshot.
798 * On the first call to it @handle should point to a zeroed
799 * snapshot_handle structure. The structure gets updated and a pointer
800 * to it should be passed to this function every next time.
802 * The @count parameter should contain the number of bytes the caller
803 * wants to write to the image. It must not be zero.
805 * On success the function returns a positive number. Then, the caller
806 * is allowed to write up to the returned number of bytes to the memory
807 * location computed by the data_of() macro. The number returned
808 * may be smaller than @count, but this only happens if the write would
809 * cross a page boundary otherwise.
811 * The function returns 0 to indicate the "end of file" condition,
812 * and a negative number is returned on error. In such cases the
813 * structure pointed to by @handle is not updated and should not be used
817 int snapshot_write_next(struct snapshot_handle
*handle
, size_t count
)
821 if (handle
->prev
&& handle
->page
> nr_meta_pages
+ nr_copy_pages
)
824 /* This makes the buffer be freed by swsusp_free() */
825 buffer
= alloc_image_page(GFP_ATOMIC
, 0);
830 handle
->buffer
= buffer
;
831 if (handle
->prev
< handle
->page
) {
833 error
= load_header(handle
, (struct swsusp_info
*)buffer
);
836 } else if (handle
->prev
<= nr_meta_pages
) {
837 handle
->pbe
= unpack_orig_addresses(buffer
, handle
->pbe
);
839 error
= prepare_image(handle
);
842 handle
->pbe
= pagedir_nosave
;
843 handle
->last_pbe
= NULL
;
844 handle
->buffer
= get_buffer(handle
);
847 handle
->pbe
= handle
->pbe
->next
;
848 handle
->buffer
= get_buffer(handle
);
850 handle
->prev
= handle
->page
;
852 handle
->buf_offset
= handle
->page_offset
;
853 if (handle
->page_offset
+ count
>= PAGE_SIZE
) {
854 count
= PAGE_SIZE
- handle
->page_offset
;
855 handle
->page_offset
= 0;
858 handle
->page_offset
+= count
;
860 handle
->offset
+= count
;
864 int snapshot_image_loaded(struct snapshot_handle
*handle
)
866 return !(!handle
->pbe
|| handle
->pbe
->next
|| !nr_copy_pages
||
867 handle
->page
<= nr_meta_pages
+ nr_copy_pages
);