make vfs & filesystems use failable copying
[minix3.git] / servers / vfs / read.c
blob36c91019b3e9722d963faa2fcae5cda8ebd48865
1 /* This file contains the heart of the mechanism used to read (and write)
2 * files. Read and write requests are split up into chunks that do not cross
3 * block boundaries. Each chunk is then processed in turn. Reads on special
4 * files are also detected and handled.
6 * The entry points into this file are
7 * do_read: perform the READ system call by calling read_write
8 * do_getdents: read entries from a directory (GETDENTS)
9 * read_write: actually do the work of READ and WRITE
13 #include "fs.h"
14 #include <minix/callnr.h>
15 #include <minix/com.h>
16 #include <minix/u64.h>
17 #include <minix/vfsif.h>
18 #include <assert.h>
19 #include <sys/dirent.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include "file.h"
23 #include "scratchpad.h"
24 #include "vnode.h"
25 #include "vmnt.h"
28 /*===========================================================================*
29 * do_read *
30 *===========================================================================*/
31 int do_read(void)
33 return(do_read_write_peek(READING, job_m_in.VFS_READWRITE_FD,
34 job_m_in.VFS_READWRITE_BUF, (size_t) job_m_in.VFS_READWRITE_LEN));
38 /*===========================================================================*
39 * lock_bsf *
40 *===========================================================================*/
41 void lock_bsf(void)
43 struct worker_thread *org_self;
45 if (mutex_trylock(&bsf_lock) == 0)
46 return;
48 org_self = worker_suspend();
50 if (mutex_lock(&bsf_lock) != 0)
51 panic("unable to lock block special file lock");
53 worker_resume(org_self);
56 /*===========================================================================*
57 * unlock_bsf *
58 *===========================================================================*/
59 void unlock_bsf(void)
61 if (mutex_unlock(&bsf_lock) != 0)
62 panic("failed to unlock block special file lock");
65 /*===========================================================================*
66 * check_bsf *
67 *===========================================================================*/
68 void check_bsf_lock(void)
70 int r = mutex_trylock(&bsf_lock);
72 if (r == -EBUSY)
73 panic("bsf_lock locked");
74 else if (r != 0)
75 panic("bsf_lock weird state");
77 /* r == 0 */
78 unlock_bsf();
81 /*===========================================================================*
82 * actual_read_write_peek *
83 *===========================================================================*/
84 int actual_read_write_peek(struct fproc *rfp, int rw_flag, int io_fd,
85 char *io_buf, size_t io_nbytes)
87 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
88 struct filp *f;
89 tll_access_t locktype;
90 int r;
91 int ro = 1;
93 if(rw_flag == WRITING) ro = 0;
95 scratch(rfp).file.fd_nr = io_fd;
96 scratch(rfp).io.io_buffer = io_buf;
97 scratch(rfp).io.io_nbytes = io_nbytes;
99 locktype = rw_flag == WRITING ? VNODE_WRITE : VNODE_READ;
100 if ((f = get_filp2(rfp, scratch(rfp).file.fd_nr, locktype)) == NULL)
101 return(err_code);
103 assert(f->filp_count > 0);
105 if (((f->filp_mode) & (ro ? R_BIT : W_BIT)) == 0) {
106 unlock_filp(f);
107 return(EBADF);
109 if (scratch(rfp).io.io_nbytes == 0) {
110 unlock_filp(f);
111 return(0); /* so char special files need not check for 0*/
114 r = read_write(rfp, rw_flag, f, scratch(rfp).io.io_buffer,
115 scratch(rfp).io.io_nbytes, who_e);
117 unlock_filp(f);
118 return(r);
121 /*===========================================================================*
122 * do_read_write_peek *
123 *===========================================================================*/
124 int do_read_write_peek(int rw_flag, int io_fd, char *io_buf, size_t io_nbytes)
126 return actual_read_write_peek(fp, rw_flag, io_fd, io_buf, io_nbytes);
129 /*===========================================================================*
130 * read_write *
131 *===========================================================================*/
132 int read_write(struct fproc *rfp, int rw_flag, struct filp *f,
133 char *buf, size_t size, endpoint_t for_e)
135 register struct vnode *vp;
136 off_t position, res_pos;
137 unsigned int cum_io, cum_io_incr, res_cum_io;
138 int op, r;
139 dev_t dev;
141 position = f->filp_pos;
142 vp = f->filp_vno;
143 r = OK;
144 cum_io = 0;
146 assert(rw_flag == READING || rw_flag == WRITING || rw_flag == PEEKING);
148 if (size > SSIZE_MAX) return(EINVAL);
150 op = (rw_flag == READING ? CDEV_READ : CDEV_WRITE);
152 if (S_ISFIFO(vp->v_mode)) { /* Pipes */
153 if (rfp->fp_cum_io_partial != 0) {
154 panic("VFS: read_write: fp_cum_io_partial not clear");
156 if(rw_flag == PEEKING) {
157 printf("read_write: peek on pipe makes no sense\n");
158 return EINVAL;
160 r = rw_pipe(rw_flag, for_e, f, buf, size);
161 } else if (S_ISCHR(vp->v_mode)) { /* Character special files. */
162 if(rw_flag == PEEKING) {
163 printf("read_write: peek on char device makes no sense\n");
164 return EINVAL;
167 if (vp->v_sdev == NO_DEV)
168 panic("VFS: read_write tries to access char dev NO_DEV");
170 dev = vp->v_sdev;
172 r = cdev_io(op, dev, for_e, buf, position, size, f->filp_flags);
173 if (r >= 0) {
174 /* This should no longer happen: all calls are asynchronous. */
175 printf("VFS: I/O to device %llx succeeded immediately!?\n", dev);
176 cum_io = r;
177 position += r;
178 r = OK;
179 } else if (r == SUSPEND) {
180 /* FIXME: multiple read/write operations on a single filp
181 * should be serialized. They currently aren't; in order to
182 * achieve a similar effect, we optimistically advance the file
183 * position here. This works under the following assumptions:
184 * - character drivers that use the seek position at all,
185 * expose a view of a statically-sized range of bytes, i.e.,
186 * they are basically byte-granular block devices;
187 * - if short I/O or an error is returned, all subsequent calls
188 * will return (respectively) EOF and an error;
189 * - the application never checks its own file seek position,
190 * or does not care that it may end up having seeked beyond
191 * the number of bytes it has actually read;
192 * - communication to the character driver is FIFO (this one
193 * is actually true! whew).
194 * Many improvements are possible here, but in the end,
195 * anything short of queuing concurrent operations will be
196 * suboptimal - so we settle for this hack for now.
198 position += size;
200 } else if (S_ISBLK(vp->v_mode)) { /* Block special files. */
201 if (vp->v_sdev == NO_DEV)
202 panic("VFS: read_write tries to access block dev NO_DEV");
204 lock_bsf();
206 if(rw_flag == PEEKING) {
207 r = req_bpeek(vp->v_bfs_e, vp->v_sdev, position, size);
208 } else {
209 r = req_breadwrite(vp->v_bfs_e, for_e, vp->v_sdev, position,
210 size, (vir_bytes) buf, rw_flag, &res_pos, &res_cum_io);
211 if (r == OK) {
212 position = res_pos;
213 cum_io += res_cum_io;
217 unlock_bsf();
218 } else { /* Regular files */
219 if (rw_flag == WRITING) {
220 /* Check for O_APPEND flag. */
221 if (f->filp_flags & O_APPEND) position = vp->v_size;
224 /* Issue request */
225 if(rw_flag == PEEKING) {
226 r = req_peek(vp->v_fs_e, vp->v_inode_nr, position, size);
227 } else {
228 off_t new_pos;
229 r = req_readwrite(vp->v_fs_e, vp->v_inode_nr, position,
230 rw_flag, for_e, (vir_bytes) buf, size, &new_pos,
231 &cum_io_incr);
233 if (r >= 0) {
234 position = new_pos;
235 cum_io += cum_io_incr;
240 /* On write, update file size and access time. */
241 if (rw_flag == WRITING) {
242 if (S_ISREG(vp->v_mode) || S_ISDIR(vp->v_mode)) {
243 if (position > vp->v_size) {
244 vp->v_size = position;
249 f->filp_pos = position;
251 if (r == EPIPE && rw_flag == WRITING) {
252 /* Process is writing, but there is no reader. Tell the kernel to
253 * generate s SIGPIPE signal.
255 if (!(f->filp_flags & O_NOSIGPIPE)) {
256 sys_kill(rfp->fp_endpoint, SIGPIPE);
260 if (r == OK) {
261 return(cum_io);
263 return(r);
266 /*===========================================================================*
267 * do_getdents *
268 *===========================================================================*/
269 int do_getdents(void)
271 /* Perform the getdents(fd, buf, size) system call. */
272 int r = OK;
273 off_t new_pos;
274 register struct filp *rfilp;
276 scratch(fp).file.fd_nr = job_m_in.VFS_READWRITE_FD;
277 scratch(fp).io.io_buffer = job_m_in.VFS_READWRITE_BUF;
278 scratch(fp).io.io_nbytes = (size_t) job_m_in.VFS_READWRITE_LEN;
280 /* Is the file descriptor valid? */
281 if ( (rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL)
282 return(err_code);
284 if (!(rfilp->filp_mode & R_BIT))
285 r = EBADF;
286 else if (!S_ISDIR(rfilp->filp_vno->v_mode))
287 r = EBADF;
289 if (r == OK) {
290 r = req_getdents(rfilp->filp_vno->v_fs_e, rfilp->filp_vno->v_inode_nr,
291 rfilp->filp_pos, scratch(fp).io.io_buffer,
292 scratch(fp).io.io_nbytes, &new_pos, 0);
294 if (r > 0) rfilp->filp_pos = new_pos;
297 unlock_filp(rfilp);
298 return(r);
302 /*===========================================================================*
303 * rw_pipe *
304 *===========================================================================*/
305 int rw_pipe(rw_flag, usr_e, f, buf, req_size)
306 int rw_flag; /* READING or WRITING */
307 endpoint_t usr_e;
308 struct filp *f;
309 char *buf;
310 size_t req_size;
312 int r, oflags, partial_pipe = 0;
313 size_t size, cum_io, cum_io_incr;
314 struct vnode *vp;
315 off_t position, new_pos;
317 /* Must make sure we're operating on locked filp and vnode */
318 assert(tll_locked_by_me(&f->filp_vno->v_lock));
319 assert(mutex_trylock(&f->filp_lock) == -EDEADLK);
321 oflags = f->filp_flags;
322 vp = f->filp_vno;
323 position = 0; /* Not actually used */
325 assert(rw_flag == READING || rw_flag == WRITING);
327 /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
328 cum_io = fp->fp_cum_io_partial;
330 r = pipe_check(f, rw_flag, oflags, req_size, 0);
331 if (r <= 0) {
332 if (r == SUSPEND) pipe_suspend(f, buf, req_size);
333 return(r);
336 size = r;
337 if (size < req_size) partial_pipe = 1;
339 /* Truncate read request at size. */
340 if (rw_flag == READING && size > vp->v_size) {
341 size = vp->v_size;
344 if (vp->v_mapfs_e == 0)
345 panic("unmapped pipe");
347 r = req_readwrite(vp->v_mapfs_e, vp->v_mapinode_nr, position, rw_flag, usr_e,
348 (vir_bytes) buf, size, &new_pos, &cum_io_incr);
350 if (r != OK) {
351 return(r);
354 cum_io += cum_io_incr;
355 buf += cum_io_incr;
356 req_size -= cum_io_incr;
358 vp->v_size = new_pos;
360 if (partial_pipe) {
361 /* partial write on pipe with */
362 /* O_NONBLOCK, return write count */
363 if (!(oflags & O_NONBLOCK)) {
364 /* partial write on pipe with req_size > PIPE_SIZE,
365 * non-atomic
367 fp->fp_cum_io_partial = cum_io;
368 pipe_suspend(f, buf, req_size);
369 return(SUSPEND);
373 fp->fp_cum_io_partial = 0;
375 return(cum_io);