4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
14 #include <linux/mount.h>
15 #include <linux/pipe_fs_i.h>
16 #include <linux/uio.h>
17 #include <asm/uaccess.h>
18 #include <asm/ioctls.h>
21 * We use a start+len construction, which provides full use of the
23 * -- Florian Coosmann (FGC)
25 * Reads with count = 0 should always return 0.
26 * -- Julian Bradfield 1999-06-07.
28 * FIFOs and Pipes now generate SIGIO for both readers and writers.
29 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
31 * pipe_read & write cleanup
32 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
35 /* Drop the inode semaphore and wait for a pipe event, atomically */
36 void pipe_wait(struct inode
* inode
)
40 prepare_to_wait(PIPE_WAIT(*inode
), &wait
, TASK_INTERRUPTIBLE
);
43 finish_wait(PIPE_WAIT(*inode
), &wait
);
44 down(PIPE_SEM(*inode
));
48 pipe_iov_copy_from_user(void *to
, struct iovec
*iov
, unsigned long len
)
55 copy
= min_t(unsigned long, len
, iov
->iov_len
);
57 if (copy_from_user(to
, iov
->iov_base
, copy
))
61 iov
->iov_base
+= copy
;
68 pipe_iov_copy_to_user(struct iovec
*iov
, const void *from
, unsigned long len
)
75 copy
= min_t(unsigned long, len
, iov
->iov_len
);
77 if (copy_to_user(iov
->iov_base
, from
, copy
))
81 iov
->iov_base
+= copy
;
88 pipe_readv(struct file
*filp
, const struct iovec
*_iov
,
89 unsigned long nr_segs
, loff_t
*ppos
)
91 struct inode
*inode
= filp
->f_dentry
->d_inode
;
94 struct iovec
*iov
= (struct iovec
*)_iov
;
97 total_len
= iov_length(iov
, nr_segs
);
98 /* Null read succeeds. */
99 if (unlikely(total_len
== 0))
104 down(PIPE_SEM(*inode
));
106 int size
= PIPE_LEN(*inode
);
108 char *pipebuf
= PIPE_BASE(*inode
) + PIPE_START(*inode
);
109 ssize_t chars
= PIPE_MAX_RCHUNK(*inode
);
111 if (chars
> total_len
)
116 if (pipe_iov_copy_to_user(iov
, pipebuf
, chars
)) {
117 if (!ret
) ret
= -EFAULT
;
122 PIPE_START(*inode
) += chars
;
123 PIPE_START(*inode
) &= (PIPE_SIZE
- 1);
124 PIPE_LEN(*inode
) -= chars
;
128 break; /* common path: read succeeded */
130 if (PIPE_LEN(*inode
)) /* test for cyclic buffers */
132 if (!PIPE_WRITERS(*inode
))
134 if (!PIPE_WAITING_WRITERS(*inode
)) {
135 /* syscall merging: Usually we must not sleep
136 * if O_NONBLOCK is set, or if we got some data.
137 * But if a writer sleeps in kernel space, then
138 * we can wait for that data without violating POSIX.
142 if (filp
->f_flags
& O_NONBLOCK
) {
147 if (signal_pending(current
)) {
148 if (!ret
) ret
= -ERESTARTSYS
;
152 wake_up_interruptible_sync(PIPE_WAIT(*inode
));
153 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
157 up(PIPE_SEM(*inode
));
158 /* Signal writers asynchronously that there is more room. */
160 wake_up_interruptible(PIPE_WAIT(*inode
));
161 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
169 pipe_read(struct file
*filp
, char __user
*buf
, size_t count
, loff_t
*ppos
)
171 struct iovec iov
= { .iov_base
= buf
, .iov_len
= count
};
172 return pipe_readv(filp
, &iov
, 1, ppos
);
176 pipe_writev(struct file
*filp
, const struct iovec
*_iov
,
177 unsigned long nr_segs
, loff_t
*ppos
)
179 struct inode
*inode
= filp
->f_dentry
->d_inode
;
183 struct iovec
*iov
= (struct iovec
*)_iov
;
186 total_len
= iov_length(iov
, nr_segs
);
187 /* Null write succeeds. */
188 if (unlikely(total_len
== 0))
196 down(PIPE_SEM(*inode
));
199 if (!PIPE_READERS(*inode
)) {
200 send_sig(SIGPIPE
, current
, 0);
201 if (!ret
) ret
= -EPIPE
;
204 free
= PIPE_FREE(*inode
);
207 ssize_t chars
= PIPE_MAX_WCHUNK(*inode
);
208 char *pipebuf
= PIPE_BASE(*inode
) + PIPE_END(*inode
);
209 /* Always wakeup, even if the copy fails. Otherwise
210 * we lock up (O_NONBLOCK-)readers that sleep due to
214 if (chars
> total_len
)
219 if (pipe_iov_copy_from_user(pipebuf
, iov
, chars
)) {
220 if (!ret
) ret
= -EFAULT
;
225 PIPE_LEN(*inode
) += chars
;
230 if (PIPE_FREE(*inode
) && ret
) {
231 /* handle cyclic data buffers */
235 if (filp
->f_flags
& O_NONBLOCK
) {
236 if (!ret
) ret
= -EAGAIN
;
239 if (signal_pending(current
)) {
240 if (!ret
) ret
= -ERESTARTSYS
;
244 wake_up_interruptible_sync(PIPE_WAIT(*inode
));
245 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
248 PIPE_WAITING_WRITERS(*inode
)++;
250 PIPE_WAITING_WRITERS(*inode
)--;
252 up(PIPE_SEM(*inode
));
254 wake_up_interruptible(PIPE_WAIT(*inode
));
255 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
258 inode_update_time(inode
, 1); /* mtime and ctime */
263 pipe_write(struct file
*filp
, const char __user
*buf
,
264 size_t count
, loff_t
*ppos
)
266 struct iovec iov
= { .iov_base
= (void __user
*)buf
, .iov_len
= count
};
267 return pipe_writev(filp
, &iov
, 1, ppos
);
271 bad_pipe_r(struct file
*filp
, char __user
*buf
, size_t count
, loff_t
*ppos
)
277 bad_pipe_w(struct file
*filp
, const char __user
*buf
, size_t count
, loff_t
*ppos
)
283 pipe_ioctl(struct inode
*pino
, struct file
*filp
,
284 unsigned int cmd
, unsigned long arg
)
288 return put_user(PIPE_LEN(*pino
), (int __user
*)arg
);
294 /* No kernel lock held - fine */
296 pipe_poll(struct file
*filp
, poll_table
*wait
)
299 struct inode
*inode
= filp
->f_dentry
->d_inode
;
301 poll_wait(filp
, PIPE_WAIT(*inode
), wait
);
303 /* Reading only -- no need for acquiring the semaphore. */
304 mask
= POLLIN
| POLLRDNORM
;
305 if (PIPE_EMPTY(*inode
))
306 mask
= POLLOUT
| POLLWRNORM
;
307 if (!PIPE_WRITERS(*inode
) && filp
->f_version
!= PIPE_WCOUNTER(*inode
))
309 if (!PIPE_READERS(*inode
))
315 /* FIXME: most Unices do not set POLLERR for fifos */
316 #define fifo_poll pipe_poll
319 pipe_release(struct inode
*inode
, int decr
, int decw
)
321 down(PIPE_SEM(*inode
));
322 PIPE_READERS(*inode
) -= decr
;
323 PIPE_WRITERS(*inode
) -= decw
;
324 if (!PIPE_READERS(*inode
) && !PIPE_WRITERS(*inode
)) {
325 struct pipe_inode_info
*info
= inode
->i_pipe
;
326 inode
->i_pipe
= NULL
;
327 free_page((unsigned long) info
->base
);
330 wake_up_interruptible(PIPE_WAIT(*inode
));
331 kill_fasync(PIPE_FASYNC_READERS(*inode
), SIGIO
, POLL_IN
);
332 kill_fasync(PIPE_FASYNC_WRITERS(*inode
), SIGIO
, POLL_OUT
);
334 up(PIPE_SEM(*inode
));
340 pipe_read_fasync(int fd
, struct file
*filp
, int on
)
342 struct inode
*inode
= filp
->f_dentry
->d_inode
;
345 down(PIPE_SEM(*inode
));
346 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_READERS(*inode
));
347 up(PIPE_SEM(*inode
));
357 pipe_write_fasync(int fd
, struct file
*filp
, int on
)
359 struct inode
*inode
= filp
->f_dentry
->d_inode
;
362 down(PIPE_SEM(*inode
));
363 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_WRITERS(*inode
));
364 up(PIPE_SEM(*inode
));
374 pipe_rdwr_fasync(int fd
, struct file
*filp
, int on
)
376 struct inode
*inode
= filp
->f_dentry
->d_inode
;
379 down(PIPE_SEM(*inode
));
381 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_READERS(*inode
));
384 retval
= fasync_helper(fd
, filp
, on
, PIPE_FASYNC_WRITERS(*inode
));
386 up(PIPE_SEM(*inode
));
396 pipe_read_release(struct inode
*inode
, struct file
*filp
)
398 pipe_read_fasync(-1, filp
, 0);
399 return pipe_release(inode
, 1, 0);
403 pipe_write_release(struct inode
*inode
, struct file
*filp
)
405 pipe_write_fasync(-1, filp
, 0);
406 return pipe_release(inode
, 0, 1);
410 pipe_rdwr_release(struct inode
*inode
, struct file
*filp
)
414 pipe_rdwr_fasync(-1, filp
, 0);
415 decr
= (filp
->f_mode
& FMODE_READ
) != 0;
416 decw
= (filp
->f_mode
& FMODE_WRITE
) != 0;
417 return pipe_release(inode
, decr
, decw
);
421 pipe_read_open(struct inode
*inode
, struct file
*filp
)
423 /* We could have perhaps used atomic_t, but this and friends
424 below are the only places. So it doesn't seem worthwhile. */
425 down(PIPE_SEM(*inode
));
426 PIPE_READERS(*inode
)++;
427 up(PIPE_SEM(*inode
));
433 pipe_write_open(struct inode
*inode
, struct file
*filp
)
435 down(PIPE_SEM(*inode
));
436 PIPE_WRITERS(*inode
)++;
437 up(PIPE_SEM(*inode
));
443 pipe_rdwr_open(struct inode
*inode
, struct file
*filp
)
445 down(PIPE_SEM(*inode
));
446 if (filp
->f_mode
& FMODE_READ
)
447 PIPE_READERS(*inode
)++;
448 if (filp
->f_mode
& FMODE_WRITE
)
449 PIPE_WRITERS(*inode
)++;
450 up(PIPE_SEM(*inode
));
456 * The file_operations structs are not static because they
457 * are also used in linux/fs/fifo.c to do operations on FIFOs.
459 struct file_operations read_fifo_fops
= {
466 .open
= pipe_read_open
,
467 .release
= pipe_read_release
,
468 .fasync
= pipe_read_fasync
,
471 struct file_operations write_fifo_fops
= {
475 .writev
= pipe_writev
,
478 .open
= pipe_write_open
,
479 .release
= pipe_write_release
,
480 .fasync
= pipe_write_fasync
,
483 struct file_operations rdwr_fifo_fops
= {
488 .writev
= pipe_writev
,
491 .open
= pipe_rdwr_open
,
492 .release
= pipe_rdwr_release
,
493 .fasync
= pipe_rdwr_fasync
,
496 struct file_operations read_pipe_fops
= {
503 .open
= pipe_read_open
,
504 .release
= pipe_read_release
,
505 .fasync
= pipe_read_fasync
,
508 struct file_operations write_pipe_fops
= {
512 .writev
= pipe_writev
,
515 .open
= pipe_write_open
,
516 .release
= pipe_write_release
,
517 .fasync
= pipe_write_fasync
,
520 struct file_operations rdwr_pipe_fops
= {
525 .writev
= pipe_writev
,
528 .open
= pipe_rdwr_open
,
529 .release
= pipe_rdwr_release
,
530 .fasync
= pipe_rdwr_fasync
,
533 struct inode
* pipe_new(struct inode
* inode
)
537 page
= __get_free_page(GFP_USER
);
541 inode
->i_pipe
= kmalloc(sizeof(struct pipe_inode_info
), GFP_KERNEL
);
545 init_waitqueue_head(PIPE_WAIT(*inode
));
546 PIPE_BASE(*inode
) = (char*) page
;
547 PIPE_START(*inode
) = PIPE_LEN(*inode
) = 0;
548 PIPE_READERS(*inode
) = PIPE_WRITERS(*inode
) = 0;
549 PIPE_WAITING_WRITERS(*inode
) = 0;
550 PIPE_RCOUNTER(*inode
) = PIPE_WCOUNTER(*inode
) = 1;
551 *PIPE_FASYNC_READERS(*inode
) = *PIPE_FASYNC_WRITERS(*inode
) = NULL
;
559 static struct vfsmount
*pipe_mnt
;
560 static int pipefs_delete_dentry(struct dentry
*dentry
)
564 static struct dentry_operations pipefs_dentry_operations
= {
565 .d_delete
= pipefs_delete_dentry
,
568 static struct inode
* get_pipe_inode(void)
570 struct inode
*inode
= new_inode(pipe_mnt
->mnt_sb
);
577 PIPE_READERS(*inode
) = PIPE_WRITERS(*inode
) = 1;
578 inode
->i_fop
= &rdwr_pipe_fops
;
581 * Mark the inode dirty from the very beginning,
582 * that way it will never be moved to the dirty
583 * list because "mark_inode_dirty()" will think
584 * that it already _is_ on the dirty list.
586 inode
->i_state
= I_DIRTY
;
587 inode
->i_mode
= S_IFIFO
| S_IRUSR
| S_IWUSR
;
588 inode
->i_uid
= current
->fsuid
;
589 inode
->i_gid
= current
->fsgid
;
590 inode
->i_atime
= inode
->i_mtime
= inode
->i_ctime
= CURRENT_TIME
;
591 inode
->i_blksize
= PAGE_SIZE
;
604 struct dentry
*dentry
;
605 struct inode
* inode
;
606 struct file
*f1
, *f2
;
611 f1
= get_empty_filp();
615 f2
= get_empty_filp();
619 inode
= get_pipe_inode();
623 error
= get_unused_fd();
625 goto close_f12_inode
;
628 error
= get_unused_fd();
630 goto close_f12_inode_i
;
634 sprintf(name
, "[%lu]", inode
->i_ino
);
636 this.len
= strlen(name
);
637 this.hash
= inode
->i_ino
; /* will go */
638 dentry
= d_alloc(pipe_mnt
->mnt_sb
->s_root
, &this);
640 goto close_f12_inode_i_j
;
641 dentry
->d_op
= &pipefs_dentry_operations
;
642 d_add(dentry
, inode
);
643 f1
->f_vfsmnt
= f2
->f_vfsmnt
= mntget(mntget(pipe_mnt
));
644 f1
->f_dentry
= f2
->f_dentry
= dget(dentry
);
645 f1
->f_mapping
= f2
->f_mapping
= inode
->i_mapping
;
648 f1
->f_pos
= f2
->f_pos
= 0;
649 f1
->f_flags
= O_RDONLY
;
650 f1
->f_op
= &read_pipe_fops
;
651 f1
->f_mode
= FMODE_READ
;
655 f2
->f_flags
= O_WRONLY
;
656 f2
->f_op
= &write_pipe_fops
;
657 f2
->f_mode
= FMODE_WRITE
;
671 free_page((unsigned long) PIPE_BASE(*inode
));
672 kfree(inode
->i_pipe
);
673 inode
->i_pipe
= NULL
;
684 * pipefs should _never_ be mounted by userland - too much of security hassle,
685 * no real gain from having the whole whorehouse mounted. So we don't need
686 * any operations on the root directory. However, we need a non-trivial
687 * d_name - pipe: will go nicely and kill the special-casing in procfs.
690 static struct super_block
*pipefs_get_sb(struct file_system_type
*fs_type
,
691 int flags
, const char *dev_name
, void *data
)
693 return get_sb_pseudo(fs_type
, "pipe:", NULL
, PIPEFS_MAGIC
);
696 static struct file_system_type pipe_fs_type
= {
698 .get_sb
= pipefs_get_sb
,
699 .kill_sb
= kill_anon_super
,
702 static int __init
init_pipe_fs(void)
704 int err
= register_filesystem(&pipe_fs_type
);
706 pipe_mnt
= kern_mount(&pipe_fs_type
);
707 if (IS_ERR(pipe_mnt
)) {
708 err
= PTR_ERR(pipe_mnt
);
709 unregister_filesystem(&pipe_fs_type
);
715 static void __exit
exit_pipe_fs(void)
717 unregister_filesystem(&pipe_fs_type
);
721 module_init(init_pipe_fs
)
722 module_exit(exit_pipe_fs
)