2 * High memory handling common code and variables.
4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
5 * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
8 * Redesigned the x86 32-bit VM architecture to deal with
9 * 64-bit physical space. With current x86 CPUs this
10 * means up to 64 Gigabytes physical RAM.
12 * Rewrote high memory support to move the page cache into
13 * high memory. Implemented permanent (schedulable) kmaps
14 * based on Linus' idea.
16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
18 * Largely rewritten to get rid of all global locks
20 * Copyright (C) 2006 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
25 #include <linux/module.h>
26 #include <linux/swap.h>
27 #include <linux/bio.h>
28 #include <linux/pagemap.h>
29 #include <linux/mempool.h>
30 #include <linux/blkdev.h>
31 #include <linux/init.h>
32 #include <linux/hash.h>
33 #include <linux/highmem.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hardirq.h>
37 #include <asm/tlbflush.h>
38 #include <asm/pgtable.h>
42 static int __set_page_address(struct page
*page
, void *virtual, int pos
);
44 unsigned long totalhigh_pages __read_mostly
;
45 EXPORT_SYMBOL(totalhigh_pages
);
47 unsigned int nr_free_highpages (void)
50 unsigned int pages
= 0;
52 for_each_online_pgdat(pgdat
) {
53 pages
+= zone_page_state(&pgdat
->node_zones
[ZONE_HIGHMEM
],
55 if (zone_movable_is_highmem())
56 pages
+= zone_page_state(
57 &pgdat
->node_zones
[ZONE_MOVABLE
],
65 * count is not a pure "count".
66 * 0 means its owned exclusively by someone
67 * 1 means its free for use - either mapped or not.
68 * n means that there are (n-1) current users of it.
70 static atomic_t pkmap_count
[LAST_PKMAP
];
71 static atomic_t pkmap_hand
;
72 static atomic_t pkmap_free
;
73 static atomic_t pkmap_users
;
75 pte_t
* pkmap_page_table
;
77 static DECLARE_WAIT_QUEUE_HEAD(pkmap_wait
);
80 * Try to free a given kmap slot.
84 * 0 - free, no TLB flush needed
85 * 1 - free, needs TLB flush
87 static int pkmap_try_free(int pos
)
89 if (atomic_cmpxchg(&pkmap_count
[pos
], 1, 0) != 1)
92 atomic_dec(&pkmap_free
);
94 * TODO: add a young bit to make it CLOCK
96 if (!pte_none(pkmap_page_table
[pos
])) {
97 struct page
*page
= pte_page(pkmap_page_table
[pos
]);
98 unsigned long addr
= PKMAP_ADDR(pos
);
99 pte_t
*ptep
= &pkmap_page_table
[pos
];
101 VM_BUG_ON(addr
!= (unsigned long)page_address(page
));
103 if (!__set_page_address(page
, NULL
, pos
))
105 flush_kernel_dcache_page(page
);
106 pte_clear(&init_mm
, addr
, ptep
);
114 static inline void pkmap_put(atomic_t
*counter
)
116 switch (atomic_dec_return(counter
)) {
121 atomic_inc(&pkmap_free
);
122 wake_up(&pkmap_wait
);
128 static int pkmap_get_free(void)
133 for (i
= 0; i
< LAST_PKMAP
; i
++) {
134 pos
= atomic_inc_return(&pkmap_hand
) & LAST_PKMAP_MASK
;
135 flush
= pkmap_try_free(pos
);
141 * wait for somebody else to unmap their entries
143 if (likely(!in_interrupt()))
144 wait_event(pkmap_wait
, atomic_read(&pkmap_free
) != 0);
151 flush_tlb_kernel_range(PKMAP_ADDR(pos
), PKMAP_ADDR(pos
+1));
153 int pos2
= (pos
+ 1) & LAST_PKMAP_MASK
;
155 int entries
[TLB_BATCH
];
158 * For those architectures that cannot help but flush the
159 * whole TLB, flush some more entries to make it worthwhile.
160 * Scan ahead of the hand to minimise search distances.
162 for (i
= 0, nr
= 0; i
< LAST_PKMAP
&& nr
< TLB_BATCH
;
163 i
++, pos2
= (pos2
+ 1) & LAST_PKMAP_MASK
) {
165 flush
= pkmap_try_free(pos2
);
170 atomic_t
*counter
= &pkmap_count
[pos2
];
171 VM_BUG_ON(atomic_read(counter
) != 0);
172 atomic_set(counter
, 2);
175 entries
[nr
++] = pos2
;
177 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP
));
179 for (i
= 0; i
< nr
; i
++) {
180 atomic_t
*counter
= &pkmap_count
[entries
[i
]];
181 VM_BUG_ON(atomic_read(counter
) != 0);
182 atomic_set(counter
, 2);
190 static unsigned long pkmap_insert(struct page
*page
)
192 int pos
= pkmap_get_free();
193 unsigned long vaddr
= PKMAP_ADDR(pos
);
194 pte_t
*ptep
= &pkmap_page_table
[pos
];
195 pte_t entry
= mk_pte(page
, kmap_prot
);
196 atomic_t
*counter
= &pkmap_count
[pos
];
198 VM_BUG_ON(atomic_read(counter
) != 0);
200 set_pte_at(&init_mm
, vaddr
, ptep
, entry
);
201 if (unlikely(!__set_page_address(page
, (void *)vaddr
, pos
))) {
203 * concurrent pkmap_inserts for this page -
204 * the other won the race, release this entry.
206 * we can still clear the pte without a tlb flush since
207 * it couldn't have been used yet.
209 pte_clear(&init_mm
, vaddr
, ptep
);
210 VM_BUG_ON(atomic_read(counter
) != 0);
211 atomic_set(counter
, 2);
215 atomic_set(counter
, 2);
221 * Flush all unused kmap mappings in order to remove stray mappings.
223 void kmap_flush_unused(void)
229 * Avoid starvation deadlock by limiting the number of tasks that can obtain a
230 * kmap to (LAST_PKMAP - KM_TYPE_NR*NR_CPUS)/2.
232 static void kmap_account(void)
236 #ifndef CONFIG_PREEMPT_RT
237 if (in_interrupt()) {
238 /* irqs can always get them */
242 if (current
->flags
& PF_KMAP
) {
243 current
->flags
&= ~PF_KMAP
;
244 /* we already accounted the second */
247 /* mark 1, account 2 */
248 current
->flags
|= PF_KMAP
;
254 * reserve KM_TYPE_NR maps per CPU for interrupt context
256 const int target
= LAST_PKMAP
257 #ifndef CONFIG_PREEMPT_RT
263 wait_event(pkmap_wait
,
264 atomic_read(&pkmap_users
) + weight
<= target
);
266 if (atomic_add_return(weight
, &pkmap_users
) > target
) {
267 atomic_sub(weight
, &pkmap_users
);
273 static void kunmap_account(void)
277 #ifndef CONFIG_PREEMPT_RT
282 if (current
->flags
& PF_KMAP
) {
283 /* there was only 1 kmap, un-account both */
284 current
->flags
&= ~PF_KMAP
;
287 /* there were two kmaps, un-account per kunmap */
292 atomic_sub(weight
, &pkmap_users
);
293 wake_up(&pkmap_wait
);
296 void *kmap_high(struct page
*page
)
302 vaddr
= (unsigned long)page_address(page
);
304 atomic_t
*counter
= &pkmap_count
[PKMAP_NR(vaddr
)];
305 if (atomic_inc_not_zero(counter
)) {
307 * atomic_inc_not_zero implies a (memory) barrier on success
308 * so page address will be reloaded.
310 unsigned long vaddr2
= (unsigned long)page_address(page
);
311 if (likely(vaddr
== vaddr2
))
312 return (void *)vaddr
;
315 * Oops, we got someone else.
317 * This can happen if we get preempted after
318 * page_address() and before atomic_inc_not_zero()
319 * and during that preemption this slot is freed and
327 vaddr
= pkmap_insert(page
);
331 return (void *)vaddr
;
334 EXPORT_SYMBOL(kmap_high
);
336 void kunmap_high(struct page
*page
)
338 unsigned long vaddr
= (unsigned long)page_address(page
);
340 pkmap_put(&pkmap_count
[PKMAP_NR(vaddr
)]);
344 EXPORT_SYMBOL(kunmap_high
);
347 #if defined(HASHED_PAGE_VIRTUAL)
349 #define PA_HASH_ORDER 7
352 * Describes one page->virtual address association.
354 static struct page_address_map
{
357 struct list_head list
;
358 } page_address_maps
[LAST_PKMAP
];
363 static struct page_address_slot
{
364 struct list_head lh
; /* List of page_address_maps */
365 spinlock_t lock
; /* Protect this bucket's list */
366 } ____cacheline_aligned_in_smp page_address_htable
[1<<PA_HASH_ORDER
];
368 static struct page_address_slot
*page_slot(struct page
*page
)
370 return &page_address_htable
[hash_ptr(page
, PA_HASH_ORDER
)];
374 * page_address - get the mapped virtual address of a page
375 * @page: &struct page to get the virtual address of
377 * Returns the page's virtual address.
380 static void *__page_address(struct page_address_slot
*pas
, struct page
*page
)
384 if (!list_empty(&pas
->lh
)) {
385 struct page_address_map
*pam
;
387 list_for_each_entry(pam
, &pas
->lh
, list
) {
388 if (pam
->page
== page
) {
398 void *page_address(struct page
*page
)
402 struct page_address_slot
*pas
;
404 if (!PageHighMem(page
))
405 return lowmem_page_address(page
);
407 pas
= page_slot(page
);
408 spin_lock_irqsave(&pas
->lock
, flags
);
409 ret
= __page_address(pas
, page
);
410 spin_unlock_irqrestore(&pas
->lock
, flags
);
414 EXPORT_SYMBOL(page_address
);
417 * set_page_address - set a page's virtual address
418 * @page: &struct page to set
419 * @virtual: virtual address to use
421 static int __set_page_address(struct page
*page
, void *virtual, int pos
)
425 struct page_address_slot
*pas
;
426 struct page_address_map
*pam
;
428 VM_BUG_ON(!PageHighMem(page
));
429 VM_BUG_ON(atomic_read(&pkmap_count
[pos
]) != 0);
430 VM_BUG_ON(pos
< 0 || pos
>= LAST_PKMAP
);
432 pas
= page_slot(page
);
433 pam
= &page_address_maps
[pos
];
435 spin_lock_irqsave(&pas
->lock
, flags
);
436 if (virtual) { /* add */
437 VM_BUG_ON(!list_empty(&pam
->list
));
439 if (!__page_address(pas
, page
)) {
441 pam
->virtual = virtual;
442 list_add_tail(&pam
->list
, &pas
->lh
);
445 } else { /* remove */
446 if (!list_empty(&pam
->list
)) {
447 list_del_init(&pam
->list
);
451 spin_unlock_irqrestore(&pas
->lock
, flags
);
456 int set_page_address(struct page
*page
, void *virtual)
459 * set_page_address is not supposed to be called when using
460 * hashed virtual addresses.
466 void __init
__page_address_init(void)
470 for (i
= 0; i
< ARRAY_SIZE(page_address_maps
); i
++)
471 INIT_LIST_HEAD(&page_address_maps
[i
].list
);
473 for (i
= 0; i
< ARRAY_SIZE(page_address_htable
); i
++) {
474 INIT_LIST_HEAD(&page_address_htable
[i
].lh
);
475 spin_lock_init(&page_address_htable
[i
].lock
);
479 #elif defined (CONFIG_HIGHMEM) /* HASHED_PAGE_VIRTUAL */
481 static int __set_page_address(struct page
*page
, void *virtual, int pos
)
483 return set_page_address(page
, virtual);
486 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
488 #if defined(CONFIG_HIGHMEM) || defined(HASHED_PAGE_VIRTUAL)
490 void __init
page_address_init(void)
492 #ifdef CONFIG_HIGHMEM
495 for (i
= 0; i
< ARRAY_SIZE(pkmap_count
); i
++)
496 atomic_set(&pkmap_count
[i
], 1);
497 atomic_set(&pkmap_hand
, 0);
498 atomic_set(&pkmap_free
, LAST_PKMAP
);
499 atomic_set(&pkmap_users
, 0);
502 #ifdef HASHED_PAGE_VIRTUAL
503 __page_address_init();
507 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */