initial commit with v2.6.9
[linux-2.6.9-moxart.git] / fs / pipe.c
blob2b42a25a414e465a1a3cb1cb2499dad7cfa0d794
1 /*
2 * linux/fs/pipe.c
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <asm/uaccess.h>
18 #include <asm/ioctls.h>
21 * We use a start+len construction, which provides full use of the
22 * allocated memory.
23 * -- Florian Coosmann (FGC)
25 * Reads with count = 0 should always return 0.
26 * -- Julian Bradfield 1999-06-07.
28 * FIFOs and Pipes now generate SIGIO for both readers and writers.
29 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
31 * pipe_read & write cleanup
32 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
35 /* Drop the inode semaphore and wait for a pipe event, atomically */
36 void pipe_wait(struct inode * inode)
38 DEFINE_WAIT(wait);
40 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
41 up(PIPE_SEM(*inode));
42 schedule();
43 finish_wait(PIPE_WAIT(*inode), &wait);
44 down(PIPE_SEM(*inode));
47 static inline int
48 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
50 unsigned long copy;
52 while (len > 0) {
53 while (!iov->iov_len)
54 iov++;
55 copy = min_t(unsigned long, len, iov->iov_len);
57 if (copy_from_user(to, iov->iov_base, copy))
58 return -EFAULT;
59 to += copy;
60 len -= copy;
61 iov->iov_base += copy;
62 iov->iov_len -= copy;
64 return 0;
67 static inline int
68 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
70 unsigned long copy;
72 while (len > 0) {
73 while (!iov->iov_len)
74 iov++;
75 copy = min_t(unsigned long, len, iov->iov_len);
77 if (copy_to_user(iov->iov_base, from, copy))
78 return -EFAULT;
79 from += copy;
80 len -= copy;
81 iov->iov_base += copy;
82 iov->iov_len -= copy;
84 return 0;
87 static ssize_t
88 pipe_readv(struct file *filp, const struct iovec *_iov,
89 unsigned long nr_segs, loff_t *ppos)
91 struct inode *inode = filp->f_dentry->d_inode;
92 int do_wakeup;
93 ssize_t ret;
94 struct iovec *iov = (struct iovec *)_iov;
95 size_t total_len;
97 total_len = iov_length(iov, nr_segs);
98 /* Null read succeeds. */
99 if (unlikely(total_len == 0))
100 return 0;
102 do_wakeup = 0;
103 ret = 0;
104 down(PIPE_SEM(*inode));
105 for (;;) {
106 int size = PIPE_LEN(*inode);
107 if (size) {
108 char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
109 ssize_t chars = PIPE_MAX_RCHUNK(*inode);
111 if (chars > total_len)
112 chars = total_len;
113 if (chars > size)
114 chars = size;
116 if (pipe_iov_copy_to_user(iov, pipebuf, chars)) {
117 if (!ret) ret = -EFAULT;
118 break;
120 ret += chars;
122 PIPE_START(*inode) += chars;
123 PIPE_START(*inode) &= (PIPE_SIZE - 1);
124 PIPE_LEN(*inode) -= chars;
125 total_len -= chars;
126 do_wakeup = 1;
127 if (!total_len)
128 break; /* common path: read succeeded */
130 if (PIPE_LEN(*inode)) /* test for cyclic buffers */
131 continue;
132 if (!PIPE_WRITERS(*inode))
133 break;
134 if (!PIPE_WAITING_WRITERS(*inode)) {
135 /* syscall merging: Usually we must not sleep
136 * if O_NONBLOCK is set, or if we got some data.
137 * But if a writer sleeps in kernel space, then
138 * we can wait for that data without violating POSIX.
140 if (ret)
141 break;
142 if (filp->f_flags & O_NONBLOCK) {
143 ret = -EAGAIN;
144 break;
147 if (signal_pending(current)) {
148 if (!ret) ret = -ERESTARTSYS;
149 break;
151 if (do_wakeup) {
152 wake_up_interruptible_sync(PIPE_WAIT(*inode));
153 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
155 pipe_wait(inode);
157 up(PIPE_SEM(*inode));
158 /* Signal writers asynchronously that there is more room. */
159 if (do_wakeup) {
160 wake_up_interruptible(PIPE_WAIT(*inode));
161 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
163 if (ret > 0)
164 file_accessed(filp);
165 return ret;
168 static ssize_t
169 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
171 struct iovec iov = { .iov_base = buf, .iov_len = count };
172 return pipe_readv(filp, &iov, 1, ppos);
175 static ssize_t
176 pipe_writev(struct file *filp, const struct iovec *_iov,
177 unsigned long nr_segs, loff_t *ppos)
179 struct inode *inode = filp->f_dentry->d_inode;
180 ssize_t ret;
181 size_t min;
182 int do_wakeup;
183 struct iovec *iov = (struct iovec *)_iov;
184 size_t total_len;
186 total_len = iov_length(iov, nr_segs);
187 /* Null write succeeds. */
188 if (unlikely(total_len == 0))
189 return 0;
191 do_wakeup = 0;
192 ret = 0;
193 min = total_len;
194 if (min > PIPE_BUF)
195 min = 1;
196 down(PIPE_SEM(*inode));
197 for (;;) {
198 int free;
199 if (!PIPE_READERS(*inode)) {
200 send_sig(SIGPIPE, current, 0);
201 if (!ret) ret = -EPIPE;
202 break;
204 free = PIPE_FREE(*inode);
205 if (free >= min) {
206 /* transfer data */
207 ssize_t chars = PIPE_MAX_WCHUNK(*inode);
208 char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
209 /* Always wakeup, even if the copy fails. Otherwise
210 * we lock up (O_NONBLOCK-)readers that sleep due to
211 * syscall merging.
213 do_wakeup = 1;
214 if (chars > total_len)
215 chars = total_len;
216 if (chars > free)
217 chars = free;
219 if (pipe_iov_copy_from_user(pipebuf, iov, chars)) {
220 if (!ret) ret = -EFAULT;
221 break;
223 ret += chars;
225 PIPE_LEN(*inode) += chars;
226 total_len -= chars;
227 if (!total_len)
228 break;
230 if (PIPE_FREE(*inode) && ret) {
231 /* handle cyclic data buffers */
232 min = 1;
233 continue;
235 if (filp->f_flags & O_NONBLOCK) {
236 if (!ret) ret = -EAGAIN;
237 break;
239 if (signal_pending(current)) {
240 if (!ret) ret = -ERESTARTSYS;
241 break;
243 if (do_wakeup) {
244 wake_up_interruptible_sync(PIPE_WAIT(*inode));
245 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
246 do_wakeup = 0;
248 PIPE_WAITING_WRITERS(*inode)++;
249 pipe_wait(inode);
250 PIPE_WAITING_WRITERS(*inode)--;
252 up(PIPE_SEM(*inode));
253 if (do_wakeup) {
254 wake_up_interruptible(PIPE_WAIT(*inode));
255 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
257 if (ret > 0)
258 inode_update_time(inode, 1); /* mtime and ctime */
259 return ret;
262 static ssize_t
263 pipe_write(struct file *filp, const char __user *buf,
264 size_t count, loff_t *ppos)
266 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
267 return pipe_writev(filp, &iov, 1, ppos);
270 static ssize_t
271 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
273 return -EBADF;
276 static ssize_t
277 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
279 return -EBADF;
282 static int
283 pipe_ioctl(struct inode *pino, struct file *filp,
284 unsigned int cmd, unsigned long arg)
286 switch (cmd) {
287 case FIONREAD:
288 return put_user(PIPE_LEN(*pino), (int __user *)arg);
289 default:
290 return -EINVAL;
294 /* No kernel lock held - fine */
295 static unsigned int
296 pipe_poll(struct file *filp, poll_table *wait)
298 unsigned int mask;
299 struct inode *inode = filp->f_dentry->d_inode;
301 poll_wait(filp, PIPE_WAIT(*inode), wait);
303 /* Reading only -- no need for acquiring the semaphore. */
304 mask = POLLIN | POLLRDNORM;
305 if (PIPE_EMPTY(*inode))
306 mask = POLLOUT | POLLWRNORM;
307 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
308 mask |= POLLHUP;
309 if (!PIPE_READERS(*inode))
310 mask |= POLLERR;
312 return mask;
315 /* FIXME: most Unices do not set POLLERR for fifos */
316 #define fifo_poll pipe_poll
318 static int
319 pipe_release(struct inode *inode, int decr, int decw)
321 down(PIPE_SEM(*inode));
322 PIPE_READERS(*inode) -= decr;
323 PIPE_WRITERS(*inode) -= decw;
324 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
325 struct pipe_inode_info *info = inode->i_pipe;
326 inode->i_pipe = NULL;
327 free_page((unsigned long) info->base);
328 kfree(info);
329 } else {
330 wake_up_interruptible(PIPE_WAIT(*inode));
331 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
332 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
334 up(PIPE_SEM(*inode));
336 return 0;
339 static int
340 pipe_read_fasync(int fd, struct file *filp, int on)
342 struct inode *inode = filp->f_dentry->d_inode;
343 int retval;
345 down(PIPE_SEM(*inode));
346 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
347 up(PIPE_SEM(*inode));
349 if (retval < 0)
350 return retval;
352 return 0;
356 static int
357 pipe_write_fasync(int fd, struct file *filp, int on)
359 struct inode *inode = filp->f_dentry->d_inode;
360 int retval;
362 down(PIPE_SEM(*inode));
363 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
364 up(PIPE_SEM(*inode));
366 if (retval < 0)
367 return retval;
369 return 0;
373 static int
374 pipe_rdwr_fasync(int fd, struct file *filp, int on)
376 struct inode *inode = filp->f_dentry->d_inode;
377 int retval;
379 down(PIPE_SEM(*inode));
381 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
383 if (retval >= 0)
384 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
386 up(PIPE_SEM(*inode));
388 if (retval < 0)
389 return retval;
391 return 0;
395 static int
396 pipe_read_release(struct inode *inode, struct file *filp)
398 pipe_read_fasync(-1, filp, 0);
399 return pipe_release(inode, 1, 0);
402 static int
403 pipe_write_release(struct inode *inode, struct file *filp)
405 pipe_write_fasync(-1, filp, 0);
406 return pipe_release(inode, 0, 1);
409 static int
410 pipe_rdwr_release(struct inode *inode, struct file *filp)
412 int decr, decw;
414 pipe_rdwr_fasync(-1, filp, 0);
415 decr = (filp->f_mode & FMODE_READ) != 0;
416 decw = (filp->f_mode & FMODE_WRITE) != 0;
417 return pipe_release(inode, decr, decw);
420 static int
421 pipe_read_open(struct inode *inode, struct file *filp)
423 /* We could have perhaps used atomic_t, but this and friends
424 below are the only places. So it doesn't seem worthwhile. */
425 down(PIPE_SEM(*inode));
426 PIPE_READERS(*inode)++;
427 up(PIPE_SEM(*inode));
429 return 0;
432 static int
433 pipe_write_open(struct inode *inode, struct file *filp)
435 down(PIPE_SEM(*inode));
436 PIPE_WRITERS(*inode)++;
437 up(PIPE_SEM(*inode));
439 return 0;
442 static int
443 pipe_rdwr_open(struct inode *inode, struct file *filp)
445 down(PIPE_SEM(*inode));
446 if (filp->f_mode & FMODE_READ)
447 PIPE_READERS(*inode)++;
448 if (filp->f_mode & FMODE_WRITE)
449 PIPE_WRITERS(*inode)++;
450 up(PIPE_SEM(*inode));
452 return 0;
456 * The file_operations structs are not static because they
457 * are also used in linux/fs/fifo.c to do operations on FIFOs.
459 struct file_operations read_fifo_fops = {
460 .llseek = no_llseek,
461 .read = pipe_read,
462 .readv = pipe_readv,
463 .write = bad_pipe_w,
464 .poll = fifo_poll,
465 .ioctl = pipe_ioctl,
466 .open = pipe_read_open,
467 .release = pipe_read_release,
468 .fasync = pipe_read_fasync,
471 struct file_operations write_fifo_fops = {
472 .llseek = no_llseek,
473 .read = bad_pipe_r,
474 .write = pipe_write,
475 .writev = pipe_writev,
476 .poll = fifo_poll,
477 .ioctl = pipe_ioctl,
478 .open = pipe_write_open,
479 .release = pipe_write_release,
480 .fasync = pipe_write_fasync,
483 struct file_operations rdwr_fifo_fops = {
484 .llseek = no_llseek,
485 .read = pipe_read,
486 .readv = pipe_readv,
487 .write = pipe_write,
488 .writev = pipe_writev,
489 .poll = fifo_poll,
490 .ioctl = pipe_ioctl,
491 .open = pipe_rdwr_open,
492 .release = pipe_rdwr_release,
493 .fasync = pipe_rdwr_fasync,
496 struct file_operations read_pipe_fops = {
497 .llseek = no_llseek,
498 .read = pipe_read,
499 .readv = pipe_readv,
500 .write = bad_pipe_w,
501 .poll = pipe_poll,
502 .ioctl = pipe_ioctl,
503 .open = pipe_read_open,
504 .release = pipe_read_release,
505 .fasync = pipe_read_fasync,
508 struct file_operations write_pipe_fops = {
509 .llseek = no_llseek,
510 .read = bad_pipe_r,
511 .write = pipe_write,
512 .writev = pipe_writev,
513 .poll = pipe_poll,
514 .ioctl = pipe_ioctl,
515 .open = pipe_write_open,
516 .release = pipe_write_release,
517 .fasync = pipe_write_fasync,
520 struct file_operations rdwr_pipe_fops = {
521 .llseek = no_llseek,
522 .read = pipe_read,
523 .readv = pipe_readv,
524 .write = pipe_write,
525 .writev = pipe_writev,
526 .poll = pipe_poll,
527 .ioctl = pipe_ioctl,
528 .open = pipe_rdwr_open,
529 .release = pipe_rdwr_release,
530 .fasync = pipe_rdwr_fasync,
533 struct inode* pipe_new(struct inode* inode)
535 unsigned long page;
537 page = __get_free_page(GFP_USER);
538 if (!page)
539 return NULL;
541 inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
542 if (!inode->i_pipe)
543 goto fail_page;
545 init_waitqueue_head(PIPE_WAIT(*inode));
546 PIPE_BASE(*inode) = (char*) page;
547 PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
548 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
549 PIPE_WAITING_WRITERS(*inode) = 0;
550 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
551 *PIPE_FASYNC_READERS(*inode) = *PIPE_FASYNC_WRITERS(*inode) = NULL;
553 return inode;
554 fail_page:
555 free_page(page);
556 return NULL;
559 static struct vfsmount *pipe_mnt;
560 static int pipefs_delete_dentry(struct dentry *dentry)
562 return 1;
564 static struct dentry_operations pipefs_dentry_operations = {
565 .d_delete = pipefs_delete_dentry,
568 static struct inode * get_pipe_inode(void)
570 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
572 if (!inode)
573 goto fail_inode;
575 if(!pipe_new(inode))
576 goto fail_iput;
577 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
578 inode->i_fop = &rdwr_pipe_fops;
581 * Mark the inode dirty from the very beginning,
582 * that way it will never be moved to the dirty
583 * list because "mark_inode_dirty()" will think
584 * that it already _is_ on the dirty list.
586 inode->i_state = I_DIRTY;
587 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
588 inode->i_uid = current->fsuid;
589 inode->i_gid = current->fsgid;
590 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
591 inode->i_blksize = PAGE_SIZE;
592 return inode;
594 fail_iput:
595 iput(inode);
596 fail_inode:
597 return NULL;
600 int do_pipe(int *fd)
602 struct qstr this;
603 char name[32];
604 struct dentry *dentry;
605 struct inode * inode;
606 struct file *f1, *f2;
607 int error;
608 int i,j;
610 error = -ENFILE;
611 f1 = get_empty_filp();
612 if (!f1)
613 goto no_files;
615 f2 = get_empty_filp();
616 if (!f2)
617 goto close_f1;
619 inode = get_pipe_inode();
620 if (!inode)
621 goto close_f12;
623 error = get_unused_fd();
624 if (error < 0)
625 goto close_f12_inode;
626 i = error;
628 error = get_unused_fd();
629 if (error < 0)
630 goto close_f12_inode_i;
631 j = error;
633 error = -ENOMEM;
634 sprintf(name, "[%lu]", inode->i_ino);
635 this.name = name;
636 this.len = strlen(name);
637 this.hash = inode->i_ino; /* will go */
638 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
639 if (!dentry)
640 goto close_f12_inode_i_j;
641 dentry->d_op = &pipefs_dentry_operations;
642 d_add(dentry, inode);
643 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
644 f1->f_dentry = f2->f_dentry = dget(dentry);
645 f1->f_mapping = f2->f_mapping = inode->i_mapping;
647 /* read file */
648 f1->f_pos = f2->f_pos = 0;
649 f1->f_flags = O_RDONLY;
650 f1->f_op = &read_pipe_fops;
651 f1->f_mode = FMODE_READ;
652 f1->f_version = 0;
654 /* write file */
655 f2->f_flags = O_WRONLY;
656 f2->f_op = &write_pipe_fops;
657 f2->f_mode = FMODE_WRITE;
658 f2->f_version = 0;
660 fd_install(i, f1);
661 fd_install(j, f2);
662 fd[0] = i;
663 fd[1] = j;
664 return 0;
666 close_f12_inode_i_j:
667 put_unused_fd(j);
668 close_f12_inode_i:
669 put_unused_fd(i);
670 close_f12_inode:
671 free_page((unsigned long) PIPE_BASE(*inode));
672 kfree(inode->i_pipe);
673 inode->i_pipe = NULL;
674 iput(inode);
675 close_f12:
676 put_filp(f2);
677 close_f1:
678 put_filp(f1);
679 no_files:
680 return error;
684 * pipefs should _never_ be mounted by userland - too much of security hassle,
685 * no real gain from having the whole whorehouse mounted. So we don't need
686 * any operations on the root directory. However, we need a non-trivial
687 * d_name - pipe: will go nicely and kill the special-casing in procfs.
690 static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
691 int flags, const char *dev_name, void *data)
693 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
696 static struct file_system_type pipe_fs_type = {
697 .name = "pipefs",
698 .get_sb = pipefs_get_sb,
699 .kill_sb = kill_anon_super,
702 static int __init init_pipe_fs(void)
704 int err = register_filesystem(&pipe_fs_type);
705 if (!err) {
706 pipe_mnt = kern_mount(&pipe_fs_type);
707 if (IS_ERR(pipe_mnt)) {
708 err = PTR_ERR(pipe_mnt);
709 unregister_filesystem(&pipe_fs_type);
712 return err;
715 static void __exit exit_pipe_fs(void)
717 unregister_filesystem(&pipe_fs_type);
718 mntput(pipe_mnt);
721 module_init(init_pipe_fs)
722 module_exit(exit_pipe_fs)