2 * linux/kernel/power/swsusp.c
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
10 * This file is released under the GPLv2.
12 * I'd like to thank the following people for their work:
14 * Pavel Machek <pavel@ucw.cz>:
15 * Modifications, defectiveness pointing, being with me at the very beginning,
16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
18 * Steve Doddi <dirk@loth.demon.co.uk>:
19 * Support the possibility of hardware state restoring.
21 * Raph <grey.havens@earthling.net>:
22 * Support for preserving states of network devices and virtual console
23 * (including X and svgatextmode)
25 * Kurt Garloff <garloff@suse.de>:
26 * Straightened the critical function in order to prevent compilers from
27 * playing tricks with local variables.
29 * Andreas Mohr <a.mohr@mailto.de>
31 * Alex Badea <vampire@go.ro>:
34 * More state savers are welcome. Especially for the scsi layer...
36 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39 #include <linux/module.h>
41 #include <linux/suspend.h>
42 #include <linux/smp_lock.h>
43 #include <linux/file.h>
44 #include <linux/utsname.h>
45 #include <linux/version.h>
46 #include <linux/delay.h>
47 #include <linux/reboot.h>
48 #include <linux/bitops.h>
49 #include <linux/vt_kern.h>
50 #include <linux/kbd_kern.h>
51 #include <linux/keyboard.h>
52 #include <linux/spinlock.h>
53 #include <linux/genhd.h>
54 #include <linux/kernel.h>
55 #include <linux/major.h>
56 #include <linux/swap.h>
58 #include <linux/device.h>
59 #include <linux/buffer_head.h>
60 #include <linux/swapops.h>
61 #include <linux/bootmem.h>
62 #include <linux/syscalls.h>
63 #include <linux/console.h>
64 #include <linux/highmem.h>
65 #include <linux/bio.h>
66 #include <linux/mount.h>
68 #include <asm/uaccess.h>
69 #include <asm/mmu_context.h>
70 #include <asm/pgtable.h>
71 #include <asm/tlbflush.h>
76 /* References to section boundaries */
77 extern const void __nosave_begin
, __nosave_end
;
79 /* Variables to be preserved over suspend */
80 static int nr_copy_pages_check
;
82 extern char resume_file
[];
84 /* Local variables that should not be affected by save */
85 static unsigned int nr_copy_pages __nosavedata
= 0;
87 /* Suspend pagedir is allocated before final copy, therefore it
88 must be freed after resume
90 Warning: this is evil. There are actually two pagedirs at time of
91 resume. One is "pagedir_save", which is empty frame allocated at
92 time of suspend, that must be freed. Second is "pagedir_nosave",
93 allocated at time of resume, that travels through memory not to
94 collide with anything.
96 Warning: this is even more evil than it seems. Pagedirs this file
97 talks about are completely different from page directories used by
100 suspend_pagedir_t
*pagedir_nosave __nosavedata
= NULL
;
101 static suspend_pagedir_t
*pagedir_save
;
103 #define SWSUSP_SIG "S1SUSPEND"
105 static struct swsusp_header
{
106 char reserved
[PAGE_SIZE
- 20 - sizeof(swp_entry_t
)];
107 swp_entry_t swsusp_info
;
110 } __attribute__((packed
, aligned(PAGE_SIZE
))) swsusp_header
;
112 static struct swsusp_info swsusp_info
;
115 * XXX: We try to keep some more pages free so that I/O operations succeed
116 * without paging. Might this be more?
118 #define PAGES_FOR_IO 512
124 /* We memorize in swapfile_used what swap devices are used for suspension */
125 #define SWAPFILE_UNUSED 0
126 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
127 #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
129 static unsigned short swapfile_used
[MAX_SWAPFILES
];
130 static unsigned short root_swap
;
132 static int mark_swapfiles(swp_entry_t prev
)
136 rw_swap_page_sync(READ
,
137 swp_entry(root_swap
, 0),
138 virt_to_page((unsigned long)&swsusp_header
));
139 if (!memcmp("SWAP-SPACE",swsusp_header
.sig
, 10) ||
140 !memcmp("SWAPSPACE2",swsusp_header
.sig
, 10)) {
141 memcpy(swsusp_header
.orig_sig
,swsusp_header
.sig
, 10);
142 memcpy(swsusp_header
.sig
,SWSUSP_SIG
, 10);
143 swsusp_header
.swsusp_info
= prev
;
144 error
= rw_swap_page_sync(WRITE
,
145 swp_entry(root_swap
, 0),
146 virt_to_page((unsigned long)
149 pr_debug("swsusp: Partition is not swap space.\n");
156 * Check whether the swap device is the specified resume
157 * device, irrespective of whether they are specified by
160 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
161 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
162 * and they'll be considered the same device. This is *necessary* for
163 * devfs, since the resume code can only recognize the form /dev/hda4,
164 * but the suspend code would see the long name.)
166 static int is_resume_device(const struct swap_info_struct
*swap_info
)
168 struct file
*file
= swap_info
->swap_file
;
169 struct inode
*inode
= file
->f_dentry
->d_inode
;
171 return S_ISBLK(inode
->i_mode
) &&
172 swsusp_resume_device
== MKDEV(imajor(inode
), iminor(inode
));
175 static int swsusp_swap_check(void) /* This is called before saving image */
179 len
=strlen(resume_file
);
183 for (i
=0; i
<MAX_SWAPFILES
; i
++) {
184 if (swap_info
[i
].flags
== 0) {
185 swapfile_used
[i
]=SWAPFILE_UNUSED
;
188 printk(KERN_WARNING
"resume= option should be used to set suspend device" );
189 if (root_swap
== 0xFFFF) {
190 swapfile_used
[i
] = SWAPFILE_SUSPEND
;
193 swapfile_used
[i
] = SWAPFILE_IGNORED
;
195 /* we ignore all swap devices that are not the resume_file */
196 if (is_resume_device(&swap_info
[i
])) {
197 swapfile_used
[i
] = SWAPFILE_SUSPEND
;
200 swapfile_used
[i
] = SWAPFILE_IGNORED
;
206 return (root_swap
!= 0xffff) ? 0 : -ENODEV
;
210 * This is called after saving image so modification
211 * will be lost after resume... and that's what we want.
212 * we make the device unusable. A new call to
213 * lock_swapdevices can unlock the devices.
215 static void lock_swapdevices(void)
220 for (i
= 0; i
< MAX_SWAPFILES
; i
++)
221 if (swapfile_used
[i
] == SWAPFILE_IGNORED
) {
222 swap_info
[i
].flags
^= 0xFF;
228 * write_swap_page - Write one page to a fresh swap location.
229 * @addr: Address we're writing.
230 * @loc: Place to store the entry we used.
232 * Allocate a new swap entry and 'sync' it. Note we discard -EIO
233 * errors. That is an artifact left over from swsusp. It did not
234 * check the return of rw_swap_page_sync() at all, since most pages
235 * written back to swap would return -EIO.
236 * This is a partial improvement, since we will at least return other
237 * errors, though we need to eventually fix the damn code.
239 static int write_page(unsigned long addr
, swp_entry_t
* loc
)
244 entry
= get_swap_page();
245 if (swp_offset(entry
) &&
246 swapfile_used
[swp_type(entry
)] == SWAPFILE_SUSPEND
) {
247 error
= rw_swap_page_sync(WRITE
, entry
,
259 * data_free - Free the swap entries used by the saved image.
261 * Walk the list of used swap entries and free each one.
262 * This is only used for cleanup when suspend fails.
264 static void data_free(void)
269 for (i
= 0; i
< nr_copy_pages
; i
++) {
270 entry
= (pagedir_nosave
+ i
)->swap_address
;
275 (pagedir_nosave
+ i
)->swap_address
= (swp_entry_t
){0};
280 * data_write - Write saved image to swap.
282 * Walk the list of pages in the image and sync each one to swap.
284 static int data_write(void)
286 int error
= 0, i
= 0;
287 unsigned int mod
= nr_copy_pages
/ 100;
293 printk( "Writing data to swap (%d pages)... ", nr_copy_pages
);
294 for_each_pbe (p
, pagedir_nosave
) {
296 printk( "\b\b\b\b%3d%%", i
/ mod
);
297 if ((error
= write_page(p
->address
, &(p
->swap_address
))))
301 printk("\b\b\b\bdone\n");
305 static void dump_info(void)
307 pr_debug(" swsusp: Version: %u\n",swsusp_info
.version_code
);
308 pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info
.num_physpages
);
309 pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info
.uts
.sysname
);
310 pr_debug(" swsusp: UTS Node: %s\n",swsusp_info
.uts
.nodename
);
311 pr_debug(" swsusp: UTS Release: %s\n",swsusp_info
.uts
.release
);
312 pr_debug(" swsusp: UTS Version: %s\n",swsusp_info
.uts
.version
);
313 pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info
.uts
.machine
);
314 pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info
.uts
.domainname
);
315 pr_debug(" swsusp: CPUs: %d\n",swsusp_info
.cpus
);
316 pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info
.image_pages
);
317 pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info
.pagedir_pages
);
320 static void init_header(void)
322 memset(&swsusp_info
, 0, sizeof(swsusp_info
));
323 swsusp_info
.version_code
= LINUX_VERSION_CODE
;
324 swsusp_info
.num_physpages
= num_physpages
;
325 memcpy(&swsusp_info
.uts
, &system_utsname
, sizeof(system_utsname
));
327 swsusp_info
.suspend_pagedir
= pagedir_nosave
;
328 swsusp_info
.cpus
= num_online_cpus();
329 swsusp_info
.image_pages
= nr_copy_pages
;
332 static int close_swap(void)
338 error
= write_page((unsigned long)&swsusp_info
, &entry
);
341 error
= mark_swapfiles(entry
);
348 * free_pagedir_entries - Free pages used by the page directory.
350 * This is used during suspend for error recovery.
353 static void free_pagedir_entries(void)
357 for (i
= 0; i
< swsusp_info
.pagedir_pages
; i
++)
358 swap_free(swsusp_info
.pagedir
[i
]);
363 * write_pagedir - Write the array of pages holding the page directory.
364 * @last: Last swap entry we write (needed for header).
367 static int write_pagedir(void)
373 printk( "Writing pagedir...");
374 for_each_pb_page (pbe
, pagedir_nosave
) {
375 if ((error
= write_page((unsigned long)pbe
, &swsusp_info
.pagedir
[n
++])))
379 swsusp_info
.pagedir_pages
= n
;
380 printk("done (%u pages)\n", n
);
385 * write_suspend_image - Write entire image and metadata.
389 static int write_suspend_image(void)
394 if ((error
= data_write()))
397 if ((error
= write_pagedir()))
400 if ((error
= close_swap()))
405 free_pagedir_entries();
412 #ifdef CONFIG_HIGHMEM
413 struct highmem_page
{
416 struct highmem_page
*next
;
419 static struct highmem_page
*highmem_copy
;
421 static int save_highmem_zone(struct zone
*zone
)
423 unsigned long zone_pfn
;
424 mark_free_pages(zone
);
425 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
427 struct highmem_page
*save
;
429 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
435 page
= pfn_to_page(pfn
);
437 * This condition results from rvmalloc() sans vmalloc_32()
438 * and architectural memory reservations. This should be
439 * corrected eventually when the cases giving rise to this
440 * are better understood.
442 if (PageReserved(page
)) {
443 printk("highmem reserved page?!\n");
446 BUG_ON(PageNosave(page
));
447 if (PageNosaveFree(page
))
449 save
= kmalloc(sizeof(struct highmem_page
), GFP_ATOMIC
);
452 save
->next
= highmem_copy
;
454 save
->data
= (void *) get_zeroed_page(GFP_ATOMIC
);
459 kaddr
= kmap_atomic(page
, KM_USER0
);
460 memcpy(save
->data
, kaddr
, PAGE_SIZE
);
461 kunmap_atomic(kaddr
, KM_USER0
);
466 #endif /* CONFIG_HIGHMEM */
469 static int save_highmem(void)
471 #ifdef CONFIG_HIGHMEM
475 pr_debug("swsusp: Saving Highmem\n");
476 for_each_zone (zone
) {
477 if (is_highmem(zone
))
478 res
= save_highmem_zone(zone
);
486 static int restore_highmem(void)
488 #ifdef CONFIG_HIGHMEM
489 printk("swsusp: Restoring Highmem\n");
490 while (highmem_copy
) {
491 struct highmem_page
*save
= highmem_copy
;
493 highmem_copy
= save
->next
;
495 kaddr
= kmap_atomic(save
->page
, KM_USER0
);
496 memcpy(kaddr
, save
->data
, PAGE_SIZE
);
497 kunmap_atomic(kaddr
, KM_USER0
);
498 free_page((long) save
->data
);
506 static int pfn_is_nosave(unsigned long pfn
)
508 unsigned long nosave_begin_pfn
= __pa(&__nosave_begin
) >> PAGE_SHIFT
;
509 unsigned long nosave_end_pfn
= PAGE_ALIGN(__pa(&__nosave_end
)) >> PAGE_SHIFT
;
510 return (pfn
>= nosave_begin_pfn
) && (pfn
< nosave_end_pfn
);
514 * saveable - Determine whether a page should be cloned or not.
517 * We save a page if it's Reserved, and not in the range of pages
518 * statically defined as 'unsaveable', or if it isn't reserved, and
519 * isn't part of a free chunk of pages.
522 static int saveable(struct zone
* zone
, unsigned long * zone_pfn
)
524 unsigned long pfn
= *zone_pfn
+ zone
->zone_start_pfn
;
530 page
= pfn_to_page(pfn
);
531 BUG_ON(PageReserved(page
) && PageNosave(page
));
532 if (PageNosave(page
))
534 if (PageReserved(page
) && pfn_is_nosave(pfn
)) {
535 pr_debug("[nosave pfn 0x%lx]", pfn
);
538 if (PageNosaveFree(page
))
544 static void count_data_pages(void)
547 unsigned long zone_pfn
;
551 for_each_zone (zone
) {
552 if (is_highmem(zone
))
554 mark_free_pages(zone
);
555 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
556 nr_copy_pages
+= saveable(zone
, &zone_pfn
);
561 static void copy_data_pages(void)
564 unsigned long zone_pfn
;
565 struct pbe
* pbe
= pagedir_nosave
;
567 pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages
);
568 for_each_zone (zone
) {
569 if (is_highmem(zone
))
571 mark_free_pages(zone
);
572 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
573 if (saveable(zone
, &zone_pfn
)) {
575 page
= pfn_to_page(zone_pfn
+ zone
->zone_start_pfn
);
577 pbe
->orig_address
= (long) page_address(page
);
578 /* copy_page is not usable for copying task structs. */
579 memcpy((void *)pbe
->address
, (void *)pbe
->orig_address
, PAGE_SIZE
);
589 * calc_nr - Determine the number of pages needed for a pbe list.
592 static int calc_nr(int nr_copy
)
595 int mod
= !!(nr_copy
% PBES_PER_PAGE
);
596 int diff
= (nr_copy
/ PBES_PER_PAGE
) + mod
;
601 mod
= !!(nr_copy
% PBES_PER_PAGE
);
602 diff
= (nr_copy
/ PBES_PER_PAGE
) + mod
- extra
;
609 * free_pagedir - free pages allocated with alloc_pagedir()
612 static inline void free_pagedir(struct pbe
*pblist
)
617 pbe
= (pblist
+ PB_PAGE_SKIP
)->next
;
618 free_page((unsigned long)pblist
);
624 * fill_pb_page - Create a list of PBEs on a given memory page
627 static inline void fill_pb_page(struct pbe
*pbpage
)
632 pbpage
+= PB_PAGE_SKIP
;
635 while (++p
< pbpage
);
639 * create_pbe_list - Create a list of PBEs on top of a given chain
640 * of memory pages allocated with alloc_pagedir()
643 static void create_pbe_list(struct pbe
*pblist
, unsigned nr_pages
)
645 struct pbe
*pbpage
, *p
;
646 unsigned num
= PBES_PER_PAGE
;
648 for_each_pb_page (pbpage
, pblist
) {
652 fill_pb_page(pbpage
);
653 num
+= PBES_PER_PAGE
;
656 for (num
-= PBES_PER_PAGE
- 1, p
= pbpage
; num
< nr_pages
; p
++, num
++)
660 pr_debug("create_pbe_list(): initialized %d PBEs\n", num
);
664 * alloc_pagedir - Allocate the page directory.
666 * First, determine exactly how many pages we need and
669 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
670 * struct pbe elements (pbes) and the last element in the page points
673 * On each page we set up a list of struct_pbe elements.
676 static struct pbe
* alloc_pagedir(unsigned nr_pages
)
679 struct pbe
*pblist
, *pbe
;
684 pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages
);
685 pblist
= (struct pbe
*)get_zeroed_page(GFP_ATOMIC
| __GFP_COLD
);
686 for (pbe
= pblist
, num
= PBES_PER_PAGE
; pbe
&& num
< nr_pages
;
687 pbe
= pbe
->next
, num
+= PBES_PER_PAGE
) {
689 pbe
->next
= (struct pbe
*)get_zeroed_page(GFP_ATOMIC
| __GFP_COLD
);
691 if (!pbe
) { /* get_zeroed_page() failed */
692 free_pagedir(pblist
);
699 * free_image_pages - Free pages allocated for snapshot
702 static void free_image_pages(void)
706 for_each_pbe (p
, pagedir_save
) {
708 ClearPageNosave(virt_to_page(p
->address
));
709 free_page(p
->address
);
716 * alloc_image_pages - Allocate pages for the snapshot.
719 static int alloc_image_pages(void)
723 for_each_pbe (p
, pagedir_save
) {
724 p
->address
= get_zeroed_page(GFP_ATOMIC
| __GFP_COLD
);
727 SetPageNosave(virt_to_page(p
->address
));
732 void swsusp_free(void)
734 BUG_ON(PageNosave(virt_to_page(pagedir_save
)));
735 BUG_ON(PageNosaveFree(virt_to_page(pagedir_save
)));
737 free_pagedir(pagedir_save
);
742 * enough_free_mem - Make sure we enough free memory to snapshot.
744 * Returns TRUE or FALSE after checking the number of available
748 static int enough_free_mem(void)
750 if (nr_free_pages() < (nr_copy_pages
+ PAGES_FOR_IO
)) {
751 pr_debug("swsusp: Not enough free pages: Have %d\n",
760 * enough_swap - Make sure we have enough swap to save the image.
762 * Returns TRUE or FALSE after checking the total amount of swap
765 * FIXME: si_swapinfo(&i) returns all swap devices information.
766 * We should only consider resume_device.
769 static int enough_swap(void)
774 if (i
.freeswap
< (nr_copy_pages
+ PAGES_FOR_IO
)) {
775 pr_debug("swsusp: Not enough swap. Need %ld\n",i
.freeswap
);
781 static int swsusp_alloc(void)
785 pagedir_nosave
= NULL
;
786 nr_copy_pages
= calc_nr(nr_copy_pages
);
788 pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
789 nr_copy_pages
, PAGES_FOR_IO
, nr_free_pages());
791 if (!enough_free_mem())
797 if (!(pagedir_save
= alloc_pagedir(nr_copy_pages
))) {
798 printk(KERN_ERR
"suspend: Allocating pagedir failed.\n");
801 create_pbe_list(pagedir_save
, nr_copy_pages
);
802 pagedir_nosave
= pagedir_save
;
803 if ((error
= alloc_image_pages())) {
804 printk(KERN_ERR
"suspend: Allocating image pages failed.\n");
809 nr_copy_pages_check
= nr_copy_pages
;
813 static int suspend_prepare_image(void)
817 pr_debug("swsusp: critical section: \n");
818 if (save_highmem()) {
819 printk(KERN_CRIT
"Suspend machine: Not enough free pages for highmem\n");
826 printk("swsusp: Need to copy %u pages\n", nr_copy_pages
);
828 error
= swsusp_alloc();
832 /* During allocating of suspend pagedir, new cold pages may appear.
839 * End of critical section. From now on, we can write to memory,
840 * but we should not touch disk. This specially means we must _not_
841 * touch swap space! Except we must write out our image of course.
844 printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages
);
849 /* It is important _NOT_ to umount filesystems at this point. We want
850 * them synced (in case something goes wrong) but we DO not want to mark
851 * filesystem clean: it is not. (And it does not matter, if we resume
852 * correctly, we'll mark system clean, anyway.)
854 int swsusp_write(void)
859 error
= write_suspend_image();
860 /* This will unlock ignored swap devices since writing is finished */
867 extern asmlinkage
int swsusp_arch_suspend(void);
868 extern asmlinkage
int swsusp_arch_resume(void);
871 asmlinkage
int swsusp_save(void)
873 return suspend_prepare_image();
876 int swsusp_suspend(void)
879 if ((error
= arch_prepare_suspend()))
882 /* At this point, device_suspend() has been called, but *not*
883 * device_power_down(). We *must* device_power_down() now.
884 * Otherwise, drivers for some devices (e.g. interrupt controllers)
885 * become desynchronized with the actual state of the hardware
886 * at resume time, and evil weirdness ensues.
888 if ((error
= device_power_down(PMSG_FREEZE
))) {
893 if ((error
= swsusp_swap_check())) {
894 printk(KERN_ERR
"swsusp: FATAL: cannot find swap device, try "
900 save_processor_state();
901 if ((error
= swsusp_arch_suspend()))
902 printk("Error %d suspending\n", error
);
903 /* Restore control flow magically appears here */
904 restore_processor_state();
905 BUG_ON (nr_copy_pages_check
!= nr_copy_pages
);
912 int swsusp_resume(void)
916 if (device_power_down(PMSG_FREEZE
))
917 printk(KERN_ERR
"Some devices failed to power down, very bad\n");
918 /* We'll ignore saved state, but this gets preempt count (etc) right */
919 save_processor_state();
920 error
= swsusp_arch_resume();
921 /* Code below is only ever reached in case of failure. Otherwise
922 * execution continues at place where swsusp_arch_suspend was called
925 restore_processor_state();
933 * On resume, for storing the PBE list and the image,
934 * we can only use memory pages that do not conflict with the pages
935 * which had been used before suspend.
937 * We don't know which pages are usable until we allocate them.
939 * Allocated but unusable (ie eaten) memory pages are linked together
940 * to create a list, so that we can free them easily
942 * We could have used a type other than (void *)
943 * for this purpose, but ...
945 static void **eaten_memory
= NULL
;
947 static inline void eat_page(void *page
)
956 static unsigned long get_usable_page(unsigned gfp_mask
)
960 m
= get_zeroed_page(gfp_mask
);
961 while (!PageNosaveFree(virt_to_page(m
))) {
963 m
= get_zeroed_page(gfp_mask
);
970 static void free_eaten_memory(void)
978 m
= (unsigned long)c
;
984 pr_debug("swsusp: %d unused pages freed\n", i
);
988 * check_pagedir - We ensure here that pages that the PBEs point to
989 * won't collide with pages where we're going to restore from the loaded
993 static int check_pagedir(struct pbe
*pblist
)
997 /* This is necessary, so that we can free allocated pages
1000 for_each_pbe (p
, pblist
)
1003 for_each_pbe (p
, pblist
) {
1004 p
->address
= get_usable_page(GFP_ATOMIC
);
1012 * swsusp_pagedir_relocate - It is possible, that some memory pages
1013 * occupied by the list of PBEs collide with pages where we're going to
1014 * restore from the loaded pages later. We relocate them here.
1017 static struct pbe
* swsusp_pagedir_relocate(struct pbe
*pblist
)
1020 unsigned long zone_pfn
;
1021 struct pbe
*pbpage
, *tail
, *p
;
1023 int rel
= 0, error
= 0;
1025 if (!pblist
) /* a sanity check */
1028 pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
1029 swsusp_info
.pagedir_pages
);
1031 /* Set page flags */
1033 for_each_zone (zone
) {
1034 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
1035 SetPageNosaveFree(pfn_to_page(zone_pfn
+
1036 zone
->zone_start_pfn
));
1039 /* Clear orig addresses */
1041 for_each_pbe (p
, pblist
)
1042 ClearPageNosaveFree(virt_to_page(p
->orig_address
));
1044 tail
= pblist
+ PB_PAGE_SKIP
;
1046 /* Relocate colliding pages */
1048 for_each_pb_page (pbpage
, pblist
) {
1049 if (!PageNosaveFree(virt_to_page((unsigned long)pbpage
))) {
1050 m
= (void *)get_usable_page(GFP_ATOMIC
| __GFP_COLD
);
1055 memcpy(m
, (void *)pbpage
, PAGE_SIZE
);
1056 if (pbpage
== pblist
)
1057 pblist
= (struct pbe
*)m
;
1059 tail
->next
= (struct pbe
*)m
;
1061 eat_page((void *)pbpage
);
1062 pbpage
= (struct pbe
*)m
;
1064 /* We have to link the PBEs again */
1066 for (p
= pbpage
; p
< pbpage
+ PB_PAGE_SKIP
; p
++)
1067 if (p
->next
) /* needed to save the end */
1072 tail
= pbpage
+ PB_PAGE_SKIP
;
1076 printk("\nswsusp: Out of memory\n\n");
1077 free_pagedir(pblist
);
1078 free_eaten_memory();
1082 printk("swsusp: Relocated %d pages\n", rel
);
1088 * Using bio to read from swap.
1089 * This code requires a bit more work than just using buffer heads
1090 * but, it is the recommended way for 2.5/2.6.
1091 * The following are to signal the beginning and end of I/O. Bios
1092 * finish asynchronously, while we want them to happen synchronously.
1093 * A simple atomic_t, and a wait loop take care of this problem.
1096 static atomic_t io_done
= ATOMIC_INIT(0);
1098 static int end_io(struct bio
* bio
, unsigned int num
, int err
)
1100 if (!test_bit(BIO_UPTODATE
, &bio
->bi_flags
))
1101 panic("I/O error reading memory image");
1102 atomic_set(&io_done
, 0);
1106 static struct block_device
* resume_bdev
;
1109 * submit - submit BIO request.
1110 * @rw: READ or WRITE.
1111 * @off physical offset of page.
1112 * @page: page we're reading or writing.
1114 * Straight from the textbook - allocate and initialize the bio.
1115 * If we're writing, make sure the page is marked as dirty.
1116 * Then submit it and wait.
1119 static int submit(int rw
, pgoff_t page_off
, void * page
)
1124 bio
= bio_alloc(GFP_ATOMIC
, 1);
1127 bio
->bi_sector
= page_off
* (PAGE_SIZE
>> 9);
1129 bio
->bi_bdev
= resume_bdev
;
1130 bio
->bi_end_io
= end_io
;
1132 if (bio_add_page(bio
, virt_to_page(page
), PAGE_SIZE
, 0) < PAGE_SIZE
) {
1133 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off
);
1139 bio_set_pages_dirty(bio
);
1141 atomic_set(&io_done
, 1);
1142 submit_bio(rw
| (1 << BIO_RW_SYNC
), bio
);
1143 while (atomic_read(&io_done
))
1151 static int bio_read_page(pgoff_t page_off
, void * page
)
1153 return submit(READ
, page_off
, page
);
1156 static int bio_write_page(pgoff_t page_off
, void * page
)
1158 return submit(WRITE
, page_off
, page
);
1162 * Sanity check if this image makes sense with this kernel/swap context
1163 * I really don't think that it's foolproof but more than nothing..
1166 static const char * sanity_check(void)
1169 if (swsusp_info
.version_code
!= LINUX_VERSION_CODE
)
1170 return "kernel version";
1171 if (swsusp_info
.num_physpages
!= num_physpages
)
1172 return "memory size";
1173 if (strcmp(swsusp_info
.uts
.sysname
,system_utsname
.sysname
))
1174 return "system type";
1175 if (strcmp(swsusp_info
.uts
.release
,system_utsname
.release
))
1176 return "kernel release";
1177 if (strcmp(swsusp_info
.uts
.version
,system_utsname
.version
))
1179 if (strcmp(swsusp_info
.uts
.machine
,system_utsname
.machine
))
1182 if(swsusp_info
.cpus
!= num_online_cpus())
1183 return "number of cpus";
1189 static int check_header(void)
1191 const char * reason
= NULL
;
1194 if ((error
= bio_read_page(swp_offset(swsusp_header
.swsusp_info
), &swsusp_info
)))
1197 /* Is this same machine? */
1198 if ((reason
= sanity_check())) {
1199 printk(KERN_ERR
"swsusp: Resume mismatch: %s\n",reason
);
1202 nr_copy_pages
= swsusp_info
.image_pages
;
1206 static int check_sig(void)
1210 memset(&swsusp_header
, 0, sizeof(swsusp_header
));
1211 if ((error
= bio_read_page(0, &swsusp_header
)))
1213 if (!memcmp(SWSUSP_SIG
, swsusp_header
.sig
, 10)) {
1214 memcpy(swsusp_header
.sig
, swsusp_header
.orig_sig
, 10);
1217 * Reset swap signature now.
1219 error
= bio_write_page(0, &swsusp_header
);
1221 printk(KERN_ERR
"swsusp: Suspend partition has wrong signature?\n");
1225 pr_debug("swsusp: Signature found, resuming\n");
1230 * data_read - Read image pages from swap.
1232 * You do not need to check for overlaps, check_pagedir()
1236 static int data_read(struct pbe
*pblist
)
1241 int mod
= swsusp_info
.image_pages
/ 100;
1246 printk("swsusp: Reading image data (%lu pages): ",
1247 swsusp_info
.image_pages
);
1249 for_each_pbe (p
, pblist
) {
1251 printk("\b\b\b\b%3d%%", i
/ mod
);
1253 error
= bio_read_page(swp_offset(p
->swap_address
),
1254 (void *)p
->address
);
1260 printk("\b\b\b\bdone\n");
1265 * read_pagedir - Read page backup list pages from swap
1268 static int read_pagedir(struct pbe
*pblist
)
1270 struct pbe
*pbpage
, *p
;
1277 printk("swsusp: Reading pagedir (%lu pages)\n",
1278 swsusp_info
.pagedir_pages
);
1280 for_each_pb_page (pbpage
, pblist
) {
1281 unsigned long offset
= swp_offset(swsusp_info
.pagedir
[i
++]);
1285 p
= (pbpage
+ PB_PAGE_SKIP
)->next
;
1286 error
= bio_read_page(offset
, (void *)pbpage
);
1287 (pbpage
+ PB_PAGE_SKIP
)->next
= p
;
1294 free_page((unsigned long)pblist
);
1296 BUG_ON(i
!= swsusp_info
.pagedir_pages
);
1302 static int check_suspend_image(void)
1306 if ((error
= check_sig()))
1309 if ((error
= check_header()))
1315 static int read_suspend_image(void)
1320 if (!(p
= alloc_pagedir(nr_copy_pages
)))
1323 if ((error
= read_pagedir(p
)))
1326 create_pbe_list(p
, nr_copy_pages
);
1328 if (!(pagedir_nosave
= swsusp_pagedir_relocate(p
)))
1331 /* Allocate memory for the image and read the data from swap */
1333 error
= check_pagedir(pagedir_nosave
);
1334 free_eaten_memory();
1336 error
= data_read(pagedir_nosave
);
1338 if (error
) { /* We fail cleanly */
1339 for_each_pbe (p
, pagedir_nosave
)
1341 free_page(p
->address
);
1344 free_pagedir(pagedir_nosave
);
1350 * swsusp_check - Check for saved image in swap
1353 int swsusp_check(void)
1357 resume_bdev
= open_by_devnum(swsusp_resume_device
, FMODE_READ
);
1358 if (!IS_ERR(resume_bdev
)) {
1359 set_blocksize(resume_bdev
, PAGE_SIZE
);
1360 error
= check_suspend_image();
1362 blkdev_put(resume_bdev
);
1364 error
= PTR_ERR(resume_bdev
);
1367 pr_debug("swsusp: resume file found\n");
1369 pr_debug("swsusp: Error %d check for resume file\n", error
);
1374 * swsusp_read - Read saved image from swap.
1377 int swsusp_read(void)
1381 if (IS_ERR(resume_bdev
)) {
1382 pr_debug("swsusp: block device not initialised\n");
1383 return PTR_ERR(resume_bdev
);
1386 error
= read_suspend_image();
1387 blkdev_put(resume_bdev
);
1390 pr_debug("swsusp: Reading resume file was successful\n");
1392 pr_debug("swsusp: Error %d resuming\n", error
);
1397 * swsusp_close - close swap device.
1400 void swsusp_close(void)
1402 if (IS_ERR(resume_bdev
)) {
1403 pr_debug("swsusp: block device not initialised\n");
1407 blkdev_put(resume_bdev
);