repo init
[linux-rt-nao.git] / mm / highmem.c
blob81c34e1e22b27a493b439551659dc48a513110d0
1 /*
2 * High memory handling common code and variables.
4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
5 * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
8 * Redesigned the x86 32-bit VM architecture to deal with
9 * 64-bit physical space. With current x86 CPUs this
10 * means up to 64 Gigabytes physical RAM.
12 * Rewrote high memory support to move the page cache into
13 * high memory. Implemented permanent (schedulable) kmaps
14 * based on Linus' idea.
16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
18 * Largely rewritten to get rid of all global locks
20 * Copyright (C) 2006 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
24 #include <linux/mm.h>
25 #include <linux/module.h>
26 #include <linux/swap.h>
27 #include <linux/bio.h>
28 #include <linux/pagemap.h>
29 #include <linux/mempool.h>
30 #include <linux/blkdev.h>
31 #include <linux/init.h>
32 #include <linux/hash.h>
33 #include <linux/highmem.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hardirq.h>
37 #include <asm/tlbflush.h>
38 #include <asm/pgtable.h>
40 #ifdef CONFIG_HIGHMEM
42 static int __set_page_address(struct page *page, void *virtual, int pos);
44 unsigned long totalhigh_pages __read_mostly;
45 EXPORT_SYMBOL(totalhigh_pages);
47 unsigned int nr_free_highpages (void)
49 pg_data_t *pgdat;
50 unsigned int pages = 0;
52 for_each_online_pgdat(pgdat) {
53 pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
54 NR_FREE_PAGES);
55 if (zone_movable_is_highmem())
56 pages += zone_page_state(
57 &pgdat->node_zones[ZONE_MOVABLE],
58 NR_FREE_PAGES);
61 return pages;
65 * count is not a pure "count".
66 * 0 means its owned exclusively by someone
67 * 1 means its free for use - either mapped or not.
68 * n means that there are (n-1) current users of it.
70 static atomic_t pkmap_count[LAST_PKMAP];
71 static atomic_t pkmap_hand;
72 static atomic_t pkmap_free;
73 static atomic_t pkmap_users;
75 pte_t * pkmap_page_table;
77 static DECLARE_WAIT_QUEUE_HEAD(pkmap_wait);
80 * Try to free a given kmap slot.
82 * Returns:
83 * -1 - in use
84 * 0 - free, no TLB flush needed
85 * 1 - free, needs TLB flush
87 static int pkmap_try_free(int pos)
89 if (atomic_cmpxchg(&pkmap_count[pos], 1, 0) != 1)
90 return -1;
92 atomic_dec(&pkmap_free);
94 * TODO: add a young bit to make it CLOCK
96 if (!pte_none(pkmap_page_table[pos])) {
97 struct page *page = pte_page(pkmap_page_table[pos]);
98 unsigned long addr = PKMAP_ADDR(pos);
99 pte_t *ptep = &pkmap_page_table[pos];
101 VM_BUG_ON(addr != (unsigned long)page_address(page));
103 if (!__set_page_address(page, NULL, pos))
104 BUG();
105 flush_kernel_dcache_page(page);
106 pte_clear(&init_mm, addr, ptep);
108 return 1;
111 return 0;
114 static inline void pkmap_put(atomic_t *counter)
116 switch (atomic_dec_return(counter)) {
117 case 0:
118 BUG();
120 case 1:
121 atomic_inc(&pkmap_free);
122 wake_up(&pkmap_wait);
126 #define TLB_BATCH 32
128 static int pkmap_get_free(void)
130 int i, pos, flush;
132 restart:
133 for (i = 0; i < LAST_PKMAP; i++) {
134 pos = atomic_inc_return(&pkmap_hand) & LAST_PKMAP_MASK;
135 flush = pkmap_try_free(pos);
136 if (flush >= 0)
137 goto got_one;
141 * wait for somebody else to unmap their entries
143 if (likely(!in_interrupt()))
144 wait_event(pkmap_wait, atomic_read(&pkmap_free) != 0);
146 goto restart;
148 got_one:
149 if (flush) {
150 #if 0
151 flush_tlb_kernel_range(PKMAP_ADDR(pos), PKMAP_ADDR(pos+1));
152 #else
153 int pos2 = (pos + 1) & LAST_PKMAP_MASK;
154 int nr;
155 int entries[TLB_BATCH];
158 * For those architectures that cannot help but flush the
159 * whole TLB, flush some more entries to make it worthwhile.
160 * Scan ahead of the hand to minimise search distances.
162 for (i = 0, nr = 0; i < LAST_PKMAP && nr < TLB_BATCH;
163 i++, pos2 = (pos2 + 1) & LAST_PKMAP_MASK) {
165 flush = pkmap_try_free(pos2);
166 if (flush < 0)
167 continue;
169 if (!flush) {
170 atomic_t *counter = &pkmap_count[pos2];
171 VM_BUG_ON(atomic_read(counter) != 0);
172 atomic_set(counter, 2);
173 pkmap_put(counter);
174 } else
175 entries[nr++] = pos2;
177 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
179 for (i = 0; i < nr; i++) {
180 atomic_t *counter = &pkmap_count[entries[i]];
181 VM_BUG_ON(atomic_read(counter) != 0);
182 atomic_set(counter, 2);
183 pkmap_put(counter);
185 #endif
187 return pos;
190 static unsigned long pkmap_insert(struct page *page)
192 int pos = pkmap_get_free();
193 unsigned long vaddr = PKMAP_ADDR(pos);
194 pte_t *ptep = &pkmap_page_table[pos];
195 pte_t entry = mk_pte(page, kmap_prot);
196 atomic_t *counter = &pkmap_count[pos];
198 VM_BUG_ON(atomic_read(counter) != 0);
200 set_pte_at(&init_mm, vaddr, ptep, entry);
201 if (unlikely(!__set_page_address(page, (void *)vaddr, pos))) {
203 * concurrent pkmap_inserts for this page -
204 * the other won the race, release this entry.
206 * we can still clear the pte without a tlb flush since
207 * it couldn't have been used yet.
209 pte_clear(&init_mm, vaddr, ptep);
210 VM_BUG_ON(atomic_read(counter) != 0);
211 atomic_set(counter, 2);
212 pkmap_put(counter);
213 vaddr = 0;
214 } else
215 atomic_set(counter, 2);
217 return vaddr;
221 * Flush all unused kmap mappings in order to remove stray mappings.
223 void kmap_flush_unused(void)
225 WARN_ON_ONCE(1);
229 * Avoid starvation deadlock by limiting the number of tasks that can obtain a
230 * kmap to (LAST_PKMAP - KM_TYPE_NR*NR_CPUS)/2.
232 static void kmap_account(void)
234 int weight;
236 #ifndef CONFIG_PREEMPT_RT
237 if (in_interrupt()) {
238 /* irqs can always get them */
239 weight = -1;
240 } else
241 #endif
242 if (current->flags & PF_KMAP) {
243 current->flags &= ~PF_KMAP;
244 /* we already accounted the second */
245 weight = 0;
246 } else {
247 /* mark 1, account 2 */
248 current->flags |= PF_KMAP;
249 weight = 2;
252 if (weight > 0) {
254 * reserve KM_TYPE_NR maps per CPU for interrupt context
256 const int target = LAST_PKMAP
257 #ifndef CONFIG_PREEMPT_RT
258 - KM_TYPE_NR*NR_CPUS
259 #endif
262 again:
263 wait_event(pkmap_wait,
264 atomic_read(&pkmap_users) + weight <= target);
266 if (atomic_add_return(weight, &pkmap_users) > target) {
267 atomic_sub(weight, &pkmap_users);
268 goto again;
273 static void kunmap_account(void)
275 int weight;
277 #ifndef CONFIG_PREEMPT_RT
278 if (in_irq()) {
279 weight = -1;
280 } else
281 #endif
282 if (current->flags & PF_KMAP) {
283 /* there was only 1 kmap, un-account both */
284 current->flags &= ~PF_KMAP;
285 weight = 2;
286 } else {
287 /* there were two kmaps, un-account per kunmap */
288 weight = 1;
291 if (weight > 0)
292 atomic_sub(weight, &pkmap_users);
293 wake_up(&pkmap_wait);
296 void *kmap_high(struct page *page)
298 unsigned long vaddr;
300 kmap_account();
301 again:
302 vaddr = (unsigned long)page_address(page);
303 if (vaddr) {
304 atomic_t *counter = &pkmap_count[PKMAP_NR(vaddr)];
305 if (atomic_inc_not_zero(counter)) {
307 * atomic_inc_not_zero implies a (memory) barrier on success
308 * so page address will be reloaded.
310 unsigned long vaddr2 = (unsigned long)page_address(page);
311 if (likely(vaddr == vaddr2))
312 return (void *)vaddr;
315 * Oops, we got someone else.
317 * This can happen if we get preempted after
318 * page_address() and before atomic_inc_not_zero()
319 * and during that preemption this slot is freed and
320 * reused.
322 pkmap_put(counter);
323 goto again;
327 vaddr = pkmap_insert(page);
328 if (!vaddr)
329 goto again;
331 return (void *)vaddr;
334 EXPORT_SYMBOL(kmap_high);
336 void kunmap_high(struct page *page)
338 unsigned long vaddr = (unsigned long)page_address(page);
339 BUG_ON(!vaddr);
340 pkmap_put(&pkmap_count[PKMAP_NR(vaddr)]);
341 kunmap_account();
344 EXPORT_SYMBOL(kunmap_high);
345 #endif
347 #if defined(HASHED_PAGE_VIRTUAL)
349 #define PA_HASH_ORDER 7
352 * Describes one page->virtual address association.
354 static struct page_address_map {
355 struct page *page;
356 void *virtual;
357 struct list_head list;
358 } page_address_maps[LAST_PKMAP];
361 * Hash table bucket
363 static struct page_address_slot {
364 struct list_head lh; /* List of page_address_maps */
365 spinlock_t lock; /* Protect this bucket's list */
366 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
368 static struct page_address_slot *page_slot(struct page *page)
370 return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
374 * page_address - get the mapped virtual address of a page
375 * @page: &struct page to get the virtual address of
377 * Returns the page's virtual address.
380 static void *__page_address(struct page_address_slot *pas, struct page *page)
382 void *ret = NULL;
384 if (!list_empty(&pas->lh)) {
385 struct page_address_map *pam;
387 list_for_each_entry(pam, &pas->lh, list) {
388 if (pam->page == page) {
389 ret = pam->virtual;
390 break;
395 return ret;
398 void *page_address(struct page *page)
400 unsigned long flags;
401 void *ret;
402 struct page_address_slot *pas;
404 if (!PageHighMem(page))
405 return lowmem_page_address(page);
407 pas = page_slot(page);
408 spin_lock_irqsave(&pas->lock, flags);
409 ret = __page_address(pas, page);
410 spin_unlock_irqrestore(&pas->lock, flags);
411 return ret;
414 EXPORT_SYMBOL(page_address);
417 * set_page_address - set a page's virtual address
418 * @page: &struct page to set
419 * @virtual: virtual address to use
421 static int __set_page_address(struct page *page, void *virtual, int pos)
423 int ret = 0;
424 unsigned long flags;
425 struct page_address_slot *pas;
426 struct page_address_map *pam;
428 VM_BUG_ON(!PageHighMem(page));
429 VM_BUG_ON(atomic_read(&pkmap_count[pos]) != 0);
430 VM_BUG_ON(pos < 0 || pos >= LAST_PKMAP);
432 pas = page_slot(page);
433 pam = &page_address_maps[pos];
435 spin_lock_irqsave(&pas->lock, flags);
436 if (virtual) { /* add */
437 VM_BUG_ON(!list_empty(&pam->list));
439 if (!__page_address(pas, page)) {
440 pam->page = page;
441 pam->virtual = virtual;
442 list_add_tail(&pam->list, &pas->lh);
443 ret = 1;
445 } else { /* remove */
446 if (!list_empty(&pam->list)) {
447 list_del_init(&pam->list);
448 ret = 1;
451 spin_unlock_irqrestore(&pas->lock, flags);
453 return ret;
456 int set_page_address(struct page *page, void *virtual)
459 * set_page_address is not supposed to be called when using
460 * hashed virtual addresses.
462 BUG();
463 return 0;
466 void __init __page_address_init(void)
468 int i;
470 for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
471 INIT_LIST_HEAD(&page_address_maps[i].list);
473 for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
474 INIT_LIST_HEAD(&page_address_htable[i].lh);
475 spin_lock_init(&page_address_htable[i].lock);
479 #elif defined (CONFIG_HIGHMEM) /* HASHED_PAGE_VIRTUAL */
481 static int __set_page_address(struct page *page, void *virtual, int pos)
483 return set_page_address(page, virtual);
486 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
488 #if defined(CONFIG_HIGHMEM) || defined(HASHED_PAGE_VIRTUAL)
490 void __init page_address_init(void)
492 #ifdef CONFIG_HIGHMEM
493 int i;
495 for (i = 0; i < ARRAY_SIZE(pkmap_count); i++)
496 atomic_set(&pkmap_count[i], 1);
497 atomic_set(&pkmap_hand, 0);
498 atomic_set(&pkmap_free, LAST_PKMAP);
499 atomic_set(&pkmap_users, 0);
500 #endif
502 #ifdef HASHED_PAGE_VIRTUAL
503 __page_address_init();
504 #endif
507 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */