Expand PMF_FN_* macros.
[netbsd-mini2440.git] / sys / compat / linux / common / linux_futex.c
blob89f42a63c955e7235c31a9adf65b7e37f8de8a94
1 /* $NetBSD: linux_futex.c,v 1.23 2009/02/23 20:28:58 rmind Exp $ */
3 /*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 * products derived from this software without specific prior written
19 * permission.
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.23 2009/02/23 20:28:58 rmind Exp $");
37 #include <sys/param.h>
38 #include <sys/time.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/lwp.h>
42 #include <sys/queue.h>
43 #include <sys/condvar.h>
44 #include <sys/mutex.h>
45 #include <sys/once.h>
46 #include <sys/kmem.h>
47 #include <sys/kernel.h>
48 #include <sys/atomic.h>
50 #include <compat/linux/common/linux_types.h>
51 #include <compat/linux/common/linux_emuldata.h>
52 #include <compat/linux/common/linux_exec.h>
53 #include <compat/linux/common/linux_signal.h>
54 #include <compat/linux/common/linux_futex.h>
55 #include <compat/linux/common/linux_ipc.h>
56 #include <compat/linux/common/linux_sched.h>
57 #include <compat/linux/common/linux_sem.h>
58 #include <compat/linux/linux_syscallargs.h>
60 void linux_to_native_timespec(struct timespec *, struct linux_timespec *);
62 struct futex;
64 struct waiting_proc {
65 lwp_t *wp_l;
66 struct futex *wp_new_futex;
67 kcondvar_t wp_futex_cv;
68 TAILQ_ENTRY(waiting_proc) wp_list;
70 struct futex {
71 void *f_uaddr;
72 int f_refcount;
73 LIST_ENTRY(futex) f_list;
74 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
77 static LIST_HEAD(futex_list, futex) futex_list;
78 static kmutex_t futex_lock;
80 #define FUTEX_LOCK mutex_enter(&futex_lock);
81 #define FUTEX_UNLOCK mutex_exit(&futex_lock);
83 #define FUTEX_LOCKED 1
84 #define FUTEX_UNLOCKED 0
86 #define FUTEX_SYSTEM_LOCK KERNEL_LOCK(1, NULL);
87 #define FUTEX_SYSTEM_UNLOCK KERNEL_UNLOCK_ONE(0);
89 #ifdef DEBUG_LINUX_FUTEX
90 #define FUTEXPRINTF(a) printf a
91 #else
92 #define FUTEXPRINTF(a)
93 #endif
95 static ONCE_DECL(futex_once);
97 static int
98 futex_init(void)
100 FUTEXPRINTF(("futex_init: initializing futex\n"));
101 mutex_init(&futex_lock, MUTEX_DEFAULT, IPL_NONE);
102 return 0;
105 static struct futex *futex_get(void *, int);
106 static void futex_put(struct futex *);
107 static int futex_sleep(struct futex *, lwp_t *, unsigned long);
108 static int futex_wake(struct futex *, int, struct futex *, int);
109 static int futex_atomic_op(lwp_t *, int, void *);
112 linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap, register_t *retval)
114 /* {
115 syscallarg(int *) uaddr;
116 syscallarg(int) op;
117 syscallarg(int) val;
118 syscallarg(const struct linux_timespec *) timeout;
119 syscallarg(int *) uaddr2;
120 syscallarg(int) val3;
121 } */
122 int val;
123 int ret;
124 struct linux_timespec timeout = { 0, 0 };
125 int error = 0;
126 struct futex *f;
127 struct futex *newf;
128 int timeout_hz;
129 struct timespec ts;
130 struct futex *f2;
131 int op_ret;
133 RUN_ONCE(&futex_once, futex_init);
136 * Our implementation provides only private futexes. Most of the apps
137 * should use private futexes but don't claim so. Therefore we treat
138 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
139 * in most cases (ie. when futexes are not shared on file descriptor
140 * or between different processes).
142 switch (SCARG(uap, op) & ~LINUX_FUTEX_PRIVATE_FLAG) {
143 case LINUX_FUTEX_WAIT:
144 FUTEX_SYSTEM_LOCK;
146 if ((error = copyin(SCARG(uap, uaddr),
147 &val, sizeof(val))) != 0) {
148 FUTEX_SYSTEM_UNLOCK;
149 return error;
152 if (val != SCARG(uap, val)) {
153 FUTEX_SYSTEM_UNLOCK;
154 return EWOULDBLOCK;
157 if (SCARG(uap, timeout) != NULL) {
158 if ((error = copyin(SCARG(uap, timeout),
159 &timeout, sizeof(timeout))) != 0) {
160 FUTEX_SYSTEM_UNLOCK;
161 return error;
165 FUTEXPRINTF(("FUTEX_WAIT %d.%d: val = %d, uaddr = %p, "
166 "*uaddr = %d, timeout = %lld.%09ld\n",
167 l->l_proc->p_pid, l->l_lid, SCARG(uap, val),
168 SCARG(uap, uaddr), val, (long long)timeout.tv_sec,
169 timeout.tv_nsec));
171 linux_to_native_timespec(&ts, &timeout);
172 if ((error = itimespecfix(&ts)) != 0) {
173 FUTEX_SYSTEM_UNLOCK;
174 return error;
176 timeout_hz = tstohz(&ts);
179 * If the user process requests a non null timeout,
180 * make sure we do not turn it into an infinite
181 * timeout because timeout_hz is 0.
183 * We use a minimal timeout of 1/hz. Maybe it would make
184 * sense to just return ETIMEDOUT without sleeping.
186 if (SCARG(uap, timeout) != NULL && timeout_hz == 0)
187 timeout_hz = 1;
189 f = futex_get(SCARG(uap, uaddr), FUTEX_UNLOCKED);
190 ret = futex_sleep(f, l, timeout_hz);
191 futex_put(f);
193 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, "
194 "ret = %d\n", l->l_proc->p_pid, l->l_lid,
195 SCARG(uap, uaddr), ret));
197 FUTEX_SYSTEM_UNLOCK;
198 switch (ret) {
199 case EWOULDBLOCK: /* timeout */
200 return ETIMEDOUT;
201 break;
202 case EINTR: /* signal */
203 return EINTR;
204 break;
205 case 0: /* FUTEX_WAKE received */
206 FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, got it\n",
207 l->l_proc->p_pid, l->l_lid, SCARG(uap, uaddr)));
208 return 0;
209 break;
210 default:
211 FUTEXPRINTF(("FUTEX_WAIT: unexpected ret = %d\n", ret));
212 break;
215 /* NOTREACHED */
216 break;
218 case LINUX_FUTEX_WAKE:
219 FUTEX_SYSTEM_LOCK;
221 * XXX: Linux is able cope with different addresses
222 * corresponding to the same mapped memory in the sleeping
223 * and the waker process(es).
225 FUTEXPRINTF(("FUTEX_WAKE %d.%d: uaddr = %p, val = %d\n",
226 l->l_proc->p_pid, l->l_lid,
227 SCARG(uap, uaddr), SCARG(uap, val)));
229 f = futex_get(SCARG(uap, uaddr), FUTEX_UNLOCKED);
230 *retval = futex_wake(f, SCARG(uap, val), NULL, 0);
231 futex_put(f);
233 FUTEX_SYSTEM_UNLOCK;
235 break;
237 case LINUX_FUTEX_CMP_REQUEUE:
238 FUTEX_SYSTEM_LOCK;
240 if ((error = copyin(SCARG(uap, uaddr),
241 &val, sizeof(val))) != 0) {
242 FUTEX_SYSTEM_UNLOCK;
243 return error;
246 if (val != SCARG(uap, val3)) {
247 FUTEX_SYSTEM_UNLOCK;
248 return EAGAIN;
251 f = futex_get(SCARG(uap, uaddr), FUTEX_UNLOCKED);
252 newf = futex_get(SCARG(uap, uaddr2), FUTEX_UNLOCKED);
253 *retval = futex_wake(f, SCARG(uap, val), newf,
254 (int)(unsigned long)SCARG(uap, timeout));
255 futex_put(f);
256 futex_put(newf);
258 FUTEX_SYSTEM_UNLOCK;
259 break;
261 case LINUX_FUTEX_REQUEUE:
262 FUTEX_SYSTEM_LOCK;
264 f = futex_get(SCARG(uap, uaddr), FUTEX_UNLOCKED);
265 newf = futex_get(SCARG(uap, uaddr2), FUTEX_UNLOCKED);
266 *retval = futex_wake(f, SCARG(uap, val), newf,
267 (int)(unsigned long)SCARG(uap, timeout));
268 futex_put(f);
269 futex_put(newf);
271 FUTEX_SYSTEM_UNLOCK;
272 break;
274 case LINUX_FUTEX_FD:
275 FUTEXPRINTF(("linux_sys_futex: unimplemented op %d\n",
276 SCARG(uap, op)));
277 return ENOSYS;
278 case LINUX_FUTEX_WAKE_OP:
279 FUTEX_SYSTEM_LOCK;
280 f = futex_get(SCARG(uap, uaddr), FUTEX_UNLOCKED);
281 f2 = futex_get(SCARG(uap, uaddr2), FUTEX_UNLOCKED);
283 * This function returns positive number as results and
284 * negative as errors
286 op_ret = futex_atomic_op(l, SCARG(uap, val3), SCARG(uap, uaddr2));
287 if (op_ret < 0) {
288 /* XXX: We don't handle EFAULT yet */
289 if (op_ret != -EFAULT) {
290 futex_put(f);
291 futex_put(f2);
292 FUTEX_SYSTEM_UNLOCK;
293 return -op_ret;
295 futex_put(f);
296 futex_put(f2);
297 FUTEX_SYSTEM_UNLOCK;
298 return EFAULT;
301 ret = futex_wake(f, SCARG(uap, val), NULL, 0);
302 futex_put(f);
303 if (op_ret > 0) {
304 op_ret = 0;
306 * Linux abuses the address of the timespec parameter
307 * as the number of retries
309 op_ret += futex_wake(f2,
310 (int)(unsigned long)SCARG(uap, timeout), NULL, 0);
311 ret += op_ret;
313 futex_put(f2);
314 *retval = ret;
315 FUTEX_SYSTEM_UNLOCK;
316 break;
317 default:
318 FUTEXPRINTF(("linux_sys_futex: unknown op %d\n",
319 SCARG(uap, op)));
320 return ENOSYS;
322 return 0;
325 static struct futex *
326 futex_get(void *uaddr, int locked)
328 struct futex *f;
330 if (locked == FUTEX_UNLOCKED)
331 FUTEX_LOCK;
333 LIST_FOREACH(f, &futex_list, f_list) {
334 if (f->f_uaddr == uaddr) {
335 f->f_refcount++;
336 if (locked == FUTEX_UNLOCKED)
337 FUTEX_UNLOCK;
338 return f;
342 /* Not found, create it */
343 f = kmem_zalloc(sizeof(*f), KM_SLEEP);
344 f->f_uaddr = uaddr;
345 f->f_refcount = 1;
346 TAILQ_INIT(&f->f_waiting_proc);
347 LIST_INSERT_HEAD(&futex_list, f, f_list);
348 if (locked == FUTEX_UNLOCKED)
349 FUTEX_UNLOCK;
351 return f;
354 static void
355 futex_put(struct futex *f)
358 FUTEX_LOCK;
359 f->f_refcount--;
360 if (f->f_refcount == 0) {
361 KASSERT(TAILQ_EMPTY(&f->f_waiting_proc));
362 LIST_REMOVE(f, f_list);
363 kmem_free(f, sizeof(*f));
365 FUTEX_UNLOCK;
367 return;
370 static int
371 futex_sleep(struct futex *f, lwp_t *l, unsigned long timeout)
373 struct waiting_proc *wp;
374 int ret;
376 wp = kmem_zalloc(sizeof(*wp), KM_SLEEP);
377 wp->wp_l = l;
378 wp->wp_new_futex = NULL;
379 cv_init(&wp->wp_futex_cv, "futex");
381 FUTEX_LOCK;
382 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list);
383 ret = cv_timedwait_sig(&wp->wp_futex_cv, &futex_lock, timeout);
384 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
385 FUTEX_UNLOCK;
387 /* if we got woken up in futex_wake */
388 if ((ret == 0) && (wp->wp_new_futex != NULL)) {
389 /* suspend us on the new futex */
390 ret = futex_sleep(wp->wp_new_futex, l, timeout);
391 /* and release the old one */
392 futex_put(wp->wp_new_futex);
395 cv_destroy(&wp->wp_futex_cv);
396 kmem_free(wp, sizeof(*wp));
397 return ret;
400 static int
401 futex_wake(struct futex *f, int n, struct futex *newf, int n2)
403 struct waiting_proc *wp;
404 int count;
406 count = newf ? 0 : 1;
408 FUTEX_LOCK;
409 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) {
410 if (count <= n) {
411 cv_signal(&wp->wp_futex_cv);
412 count++;
413 } else {
414 if (newf == NULL)
415 continue;
416 /* futex_put called after tsleep */
417 wp->wp_new_futex = futex_get(newf->f_uaddr,
418 FUTEX_LOCKED);
419 cv_signal(&wp->wp_futex_cv);
420 if (count - n >= n2)
421 break;
424 FUTEX_UNLOCK;
426 return count;
429 static int
430 futex_atomic_op(lwp_t *l, int encoded_op, void *uaddr)
432 const int op = (encoded_op >> 28) & 7;
433 const int cmp = (encoded_op >> 24) & 15;
434 const int cmparg = (encoded_op << 20) >> 20;
435 int oparg = (encoded_op << 8) >> 20;
436 int error, oldval, cval;
438 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
439 oparg = 1 << oparg;
441 /* XXX: linux verifies access here and returns EFAULT */
443 if (copyin(uaddr, &cval, sizeof(int)) != 0)
444 return -EFAULT;
446 for (;;) {
447 int nval;
449 switch (op) {
450 case FUTEX_OP_SET:
451 nval = oparg;
452 break;
453 case FUTEX_OP_ADD:
454 nval = cval + oparg;
455 break;
456 case FUTEX_OP_OR:
457 nval = cval | oparg;
458 break;
459 case FUTEX_OP_ANDN:
460 nval = cval & ~oparg;
461 break;
462 case FUTEX_OP_XOR:
463 nval = cval ^ oparg;
464 break;
465 default:
466 return -ENOSYS;
469 error = ucas_int(uaddr, cval, nval, &oldval);
470 if (oldval == cval || error) {
471 break;
473 cval = oldval;
476 if (error)
477 return -EFAULT;
479 switch (cmp) {
480 case FUTEX_OP_CMP_EQ:
481 return (oldval == cmparg);
482 case FUTEX_OP_CMP_NE:
483 return (oldval != cmparg);
484 case FUTEX_OP_CMP_LT:
485 return (oldval < cmparg);
486 case FUTEX_OP_CMP_GE:
487 return (oldval >= cmparg);
488 case FUTEX_OP_CMP_LE:
489 return (oldval <= cmparg);
490 case FUTEX_OP_CMP_GT:
491 return (oldval > cmparg);
492 default:
493 return -ENOSYS;
498 linux_sys_set_robust_list(struct lwp *l,
499 const struct linux_sys_set_robust_list_args *uap, register_t *retval)
501 struct proc *p = l->l_proc;
502 struct linux_emuldata *led = p->p_emuldata;
504 if (SCARG(uap, len) != sizeof(*(led->robust_futexes)))
505 return EINVAL;
506 led->robust_futexes = SCARG(uap, head);
507 *retval = 0;
508 return 0;
512 linux_sys_get_robust_list(struct lwp *l,
513 const struct linux_sys_get_robust_list_args *uap, register_t *retval)
515 struct linux_emuldata *led;
516 struct linux_robust_list_head **head;
517 size_t len = sizeof(*led->robust_futexes);
518 int error = 0;
520 if (!SCARG(uap, pid)) {
521 led = l->l_proc->p_emuldata;
522 head = &led->robust_futexes;
523 } else {
524 struct proc *p;
526 mutex_enter(proc_lock);
527 if ((p = p_find(SCARG(uap, pid), PFIND_LOCKED)) == NULL ||
528 p->p_emul != &emul_linux) {
529 mutex_exit(proc_lock);
530 return ESRCH;
532 led = p->p_emuldata;
533 head = &led->robust_futexes;
534 mutex_exit(proc_lock);
537 error = copyout(&len, SCARG(uap, len), sizeof(len));
538 if (error)
539 return error;
540 return copyout(head, SCARG(uap, head), sizeof(*head));
543 static int
544 handle_futex_death(void *uaddr, pid_t pid, int pi)
546 int uval, nval, mval;
547 struct futex *f;
549 retry:
550 if (copyin(uaddr, &uval, 4))
551 return EFAULT;
553 if ((uval & FUTEX_TID_MASK) == pid) {
554 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
555 nval = atomic_cas_32(uaddr, uval, mval);
557 if (nval == -1)
558 return EFAULT;
560 if (nval != uval)
561 goto retry;
563 if (!pi && (uval & FUTEX_WAITERS)) {
564 f = futex_get(uaddr, FUTEX_UNLOCKED);
565 futex_wake(f, 1, NULL, 0);
569 return 0;
572 static int
573 fetch_robust_entry(struct linux_robust_list **entry,
574 struct linux_robust_list **head, int *pi)
576 unsigned long uentry;
578 if (copyin((const void *)head, &uentry, sizeof(unsigned long)))
579 return EFAULT;
581 *entry = (void *)(uentry & ~1UL);
582 *pi = uentry & 1;
584 return 0;
587 /* This walks the list of robust futexes, releasing them. */
588 void
589 release_futexes(struct proc *p)
591 struct linux_robust_list_head head;
592 struct linux_robust_list *entry, *next_entry = NULL, *pending;
593 unsigned int limit = 2048, pi, next_pi, pip;
594 struct linux_emuldata *led;
595 unsigned long futex_offset;
596 int rc;
598 led = p->p_emuldata;
599 if (led->robust_futexes == NULL)
600 return;
602 if (copyin(led->robust_futexes, &head, sizeof(head)))
603 return;
605 if (fetch_robust_entry(&entry, &head.list.next, &pi))
606 return;
608 if (copyin(&head.futex_offset, &futex_offset, sizeof(unsigned long)))
609 return;
611 if (fetch_robust_entry(&pending, &head.pending_list, &pip))
612 return;
614 while (entry != &head.list) {
615 rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
617 if (entry != pending)
618 if (handle_futex_death((char *)entry + futex_offset,
619 p->p_pid, pi))
620 return;
622 if (rc)
623 return;
625 entry = next_entry;
626 pi = next_pi;
628 if (!--limit)
629 break;
631 yield(); /* XXX why? */
634 if (pending)
635 handle_futex_death((char *)pending + futex_offset,
636 p->p_pid, pip);