kernel/pid.c

   1 /*
   2  * Generic pidhash and scalable, time-bounded PID allocator
   3  *
   4  * (C) 2002 William Irwin, IBM
   5  * (C) 2002 Ingo Molnar, Red Hat
   6  *
   7  * pid-structures are backing objects for tasks sharing a given ID to chain
   8  * against. There is very little to them aside from hashing them and
   9  * parking tasks using given ID's on a list.
  10  *
  11  * The hash is always changed with the tasklist_lock write-acquired,
  12  * and the hash is only accessed with the tasklist_lock at least
  13  * read-acquired, so there's no additional SMP locking needed here.
  14  *
  15  * We have a list of bitmap pages, which bitmaps represent the PID space.
  16  * Allocating and freeing PIDs is completely lockless. The worst-case
  17  * allocation scenario when all but one out of 1 million PIDs possible are
  18  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  19  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
  20  */
  21
  22 #include <linux/mm.h>
  23 #include <linux/module.h>
  24 #include <linux/slab.h>
  25 #include <linux/init.h>
  26 #include <linux/bootmem.h>
  27 #include <linux/hash.h>
  28
  29 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
  30 static struct hlist_head *pid_hash[PIDTYPE_MAX];
  31 static int pidhash_shift;
  32
  33 int pid_max = PID_MAX_DEFAULT;
  34 int last_pid;
  35
  36 #define RESERVED_PIDS           300
  37
  38 #define PIDMAP_ENTRIES          (PID_MAX_LIMIT/PAGE_SIZE/8)
  39 #define BITS_PER_PAGE           (PAGE_SIZE*8)
  40 #define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
  41
  42 /*
  43  * PID-map pages start out as NULL, they get allocated upon
  44  * first use and are never deallocated. This way a low pid_max
  45  * value does not cause lots of bitmaps to be allocated, but
  46  * the scheme scales to up to 4 million PIDs, runtime.
  47  */
  48 typedef struct pidmap {
  49         atomic_t nr_free;
  50         void *page;
  51 } pidmap_t;
  52
  53 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
  54          { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
  55
  56 static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
  57
  58 static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
  59
  60 fastcall void free_pidmap(int pid)
  61 {
  62         pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
  63         int offset = pid & BITS_PER_PAGE_MASK;
  64
  65         clear_bit(offset, map->page);
  66         atomic_inc(&map->nr_free);
  67 }
  68
  69 /*
  70  * Here we search for the next map that has free bits left.
  71  * Normally the next map has free PIDs.
  72  */
  73 static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
  74 {
  75         while (--*max_steps) {
  76                 if (++map == map_limit)
  77                         map = pidmap_array;
  78                 if (unlikely(!map->page)) {
  79                         unsigned long page = get_zeroed_page(GFP_KERNEL);
  80                         /*
  81                          * Free the page if someone raced with us
  82                          * installing it:
  83                          */
  84                         spin_lock(&pidmap_lock);
  85                         if (map->page)
  86                                 free_page(page);
  87                         else
  88                                 map->page = (void *)page;
  89                         spin_unlock(&pidmap_lock);
  90
  91                         if (!map->page)
  92                                 break;
  93                 }
  94                 if (atomic_read(&map->nr_free))
  95                         return map;
  96         }
  97         return NULL;
  98 }
  99
 100 int alloc_pidmap(void)
 101 {
 102         int pid, offset, max_steps = PIDMAP_ENTRIES + 1;
 103         pidmap_t *map;
 104
 105         pid = last_pid + 1;
 106         if (pid >= pid_max)
 107                 pid = RESERVED_PIDS;
 108
 109         offset = pid & BITS_PER_PAGE_MASK;
 110         map = pidmap_array + pid / BITS_PER_PAGE;
 111
 112         if (likely(map->page && !test_and_set_bit(offset, map->page))) {
 113                 /*
 114                  * There is a small window for last_pid updates to race,
 115                  * but in that case the next allocation will go into the
 116                  * slowpath and that fixes things up.
 117                  */
 118 return_pid:
 119                 atomic_dec(&map->nr_free);
 120                 last_pid = pid;
 121                 return pid;
 122         }
 123
 124         if (!offset || !atomic_read(&map->nr_free)) {
 125 next_map:
 126                 map = next_free_map(map, &max_steps);
 127                 if (!map)
 128                         goto failure;
 129                 offset = 0;
 130         }
 131         /*
 132          * Find the next zero bit:
 133          */
 134 scan_more:
 135         offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset);
 136         if (offset >= BITS_PER_PAGE)
 137                 goto next_map;
 138         if (test_and_set_bit(offset, map->page))
 139                 goto scan_more;
 140
 141         /* we got the PID: */
 142         pid = (map - pidmap_array) * BITS_PER_PAGE + offset;
 143         goto return_pid;
 144
 145 failure:
 146         return -1;
 147 }
 148
 149 struct pid * fastcall find_pid(enum pid_type type, int nr)
 150 {
 151         struct hlist_node *elem;
 152         struct pid *pid;
 153
 154         hlist_for_each_entry(pid, elem,
 155                         &pid_hash[type][pid_hashfn(nr)], pid_chain) {
 156                 if (pid->nr == nr)
 157                         return pid;
 158         }
 159         return NULL;
 160 }
 161
 162 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 163 {
 164         struct pid *pid, *task_pid;
 165
 166         task_pid = &task->pids[type];
 167         pid = find_pid(type, nr);
 168         if (pid == NULL) {
 169                 hlist_add_head(&task_pid->pid_chain,
 170                                 &pid_hash[type][pid_hashfn(nr)]);
 171                 INIT_LIST_HEAD(&task_pid->pid_list);
 172         } else {
 173                 INIT_HLIST_NODE(&task_pid->pid_chain);
 174                 list_add_tail(&task_pid->pid_list, &pid->pid_list);
 175         }
 176         task_pid->nr = nr;
 177
 178         return 0;
 179 }
 180
 181 static inline int __detach_pid(task_t *task, enum pid_type type)
 182 {
 183         struct pid *pid, *pid_next;
 184         int nr;
 185
 186         pid = &task->pids[type];
 187         if (!hlist_unhashed(&pid->pid_chain)) {
 188                 hlist_del(&pid->pid_chain);
 189                 if (!list_empty(&pid->pid_list)) {
 190                         pid_next = list_entry(pid->pid_list.next,
 191                                                 struct pid, pid_list);
 192                         /* insert next pid from pid_list to hash */
 193                         hlist_add_head(&pid_next->pid_chain,
 194                                 &pid_hash[type][pid_hashfn(pid_next->nr)]);
 195                 }
 196         }
 197         list_del(&pid->pid_list);
 198         nr = pid->nr;
 199         pid->nr = 0;
 200
 201         return nr;
 202 }
 203
 204 void fastcall detach_pid(task_t *task, enum pid_type type)
 205 {
 206         int nr;
 207
 208         nr = __detach_pid(task, type);
 209         if (!nr)
 210                 return;
 211
 212         for (type = 0; type < PIDTYPE_MAX; ++type)
 213                 if (find_pid(type, nr))
 214                         return;
 215         free_pidmap(nr);
 216 }
 217
 218 task_t *find_task_by_pid_type(int type, int nr)
 219 {
 220         struct pid *pid;
 221
 222         pid = find_pid(type, nr);
 223         if (!pid)
 224                 return NULL;
 225
 226         return pid_task(&pid->pid_list, type);
 227 }
 228
 229 EXPORT_SYMBOL(find_task_by_pid_type);
 230
 231 /*
 232  * This function switches the PIDs if a non-leader thread calls
 233  * sys_execve() - this must be done without releasing the PID.
 234  * (which a detach_pid() would eventually do.)
 235  */
 236 void switch_exec_pids(task_t *leader, task_t *thread)
 237 {
 238         __detach_pid(leader, PIDTYPE_PID);
 239         __detach_pid(leader, PIDTYPE_TGID);
 240         __detach_pid(leader, PIDTYPE_PGID);
 241         __detach_pid(leader, PIDTYPE_SID);
 242
 243         __detach_pid(thread, PIDTYPE_PID);
 244         __detach_pid(thread, PIDTYPE_TGID);
 245
 246         leader->pid = leader->tgid = thread->pid;
 247         thread->pid = thread->tgid;
 248
 249         attach_pid(thread, PIDTYPE_PID, thread->pid);
 250         attach_pid(thread, PIDTYPE_TGID, thread->tgid);
 251         attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
 252         attach_pid(thread, PIDTYPE_SID, thread->signal->session);
 253         list_add_tail(&thread->tasks, &init_task.tasks);
 254
 255         attach_pid(leader, PIDTYPE_PID, leader->pid);
 256         attach_pid(leader, PIDTYPE_TGID, leader->tgid);
 257         attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
 258         attach_pid(leader, PIDTYPE_SID, leader->signal->session);
 259 }
 260
 261 /*
 262  * The pid hash table is scaled according to the amount of memory in the
 263  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 264  * more.
 265  */
 266 void __init pidhash_init(void)
 267 {
 268         int i, j, pidhash_size;
 269         unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 270
 271         pidhash_shift = max(4, fls(megabytes * 4));
 272         pidhash_shift = min(12, pidhash_shift);
 273         pidhash_size = 1 << pidhash_shift;
 274
 275         printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
 276                 pidhash_size, pidhash_shift,
 277                 PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
 278
 279         for (i = 0; i < PIDTYPE_MAX; i++) {
 280                 pid_hash[i] = alloc_bootmem(pidhash_size *
 281                                         sizeof(*(pid_hash[i])));
 282                 if (!pid_hash[i])
 283                         panic("Could not alloc pidhash!\n");
 284                 for (j = 0; j < pidhash_size; j++)
 285                         INIT_HLIST_HEAD(&pid_hash[i][j]);
 286         }
 287 }
 288
 289 void __init pidmap_init(void)
 290 {
 291         int i;
 292
 293         pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
 294         set_bit(0, pidmap_array->page);
 295         atomic_dec(&pidmap_array->nr_free);
 296
 297         /*
 298          * Allocate PID 0, and hash it via all PID types:
 299          */
 300
 301         for (i = 0; i < PIDTYPE_MAX; i++)
 302                 attach_pid(current, i, 0);
 303 }