1 /* This file contains the heart of the mechanism used to read (and write)
2 * files. Read and write requests are split up into chunks that do not cross
3 * block boundaries. Each chunk is then processed in turn. Reads on special
4 * files are also detected and handled.
6 * The entry points into this file are
7 * do_read: perform the READ system call by calling read_write
8 * do_getdents: read entries from a directory (GETDENTS)
9 * read_write: actually do the work of READ and WRITE
14 #include <minix/callnr.h>
15 #include <minix/com.h>
16 #include <minix/u64.h>
17 #include <minix/vfsif.h>
19 #include <sys/dirent.h>
27 /*===========================================================================*
29 *===========================================================================*/
34 * This field is currently reserved for internal usage only, and must be set
35 * to zero by the caller. We may use it for future SA_RESTART support just
36 * like we are using it internally now.
38 if (job_m_in
.m_lc_vfs_readwrite
.cum_io
!= 0)
41 return(do_read_write_peek(READING
, job_m_in
.m_lc_vfs_readwrite
.fd
,
42 job_m_in
.m_lc_vfs_readwrite
.buf
, job_m_in
.m_lc_vfs_readwrite
.len
));
46 /*===========================================================================*
48 *===========================================================================*/
51 struct worker_thread
*org_self
;
53 if (mutex_trylock(&bsf_lock
) == 0)
56 org_self
= worker_suspend();
58 if (mutex_lock(&bsf_lock
) != 0)
59 panic("unable to lock block special file lock");
61 worker_resume(org_self
);
64 /*===========================================================================*
66 *===========================================================================*/
69 if (mutex_unlock(&bsf_lock
) != 0)
70 panic("failed to unlock block special file lock");
73 /*===========================================================================*
75 *===========================================================================*/
76 void check_bsf_lock(void)
78 int r
= mutex_trylock(&bsf_lock
);
81 panic("bsf_lock locked");
83 panic("bsf_lock weird state");
89 /*===========================================================================*
90 * actual_read_write_peek *
91 *===========================================================================*/
92 int actual_read_write_peek(struct fproc
*rfp
, int rw_flag
, int fd
,
93 vir_bytes buf
, size_t nbytes
)
95 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
97 tll_access_t locktype
;
101 if(rw_flag
== WRITING
) ro
= 0;
103 locktype
= rw_flag
== WRITING
? VNODE_WRITE
: VNODE_READ
;
104 if ((f
= get_filp2(rfp
, fd
, locktype
)) == NULL
)
107 assert(f
->filp_count
> 0);
109 if (((f
->filp_mode
) & (ro
? R_BIT
: W_BIT
)) == 0) {
115 return(0); /* so char special files need not check for 0*/
118 r
= read_write(rfp
, rw_flag
, fd
, f
, buf
, nbytes
, who_e
);
124 /*===========================================================================*
125 * do_read_write_peek *
126 *===========================================================================*/
127 int do_read_write_peek(int rw_flag
, int fd
, vir_bytes buf
, size_t nbytes
)
129 return actual_read_write_peek(fp
, rw_flag
, fd
, buf
, nbytes
);
132 /*===========================================================================*
134 *===========================================================================*/
135 int read_write(struct fproc
*rfp
, int rw_flag
, int fd
, struct filp
*f
,
136 vir_bytes buf
, size_t size
, endpoint_t for_e
)
138 register struct vnode
*vp
;
139 off_t position
, res_pos
;
140 size_t cum_io
, res_cum_io
;
145 position
= f
->filp_pos
;
150 assert(rw_flag
== READING
|| rw_flag
== WRITING
|| rw_flag
== PEEKING
);
152 if (size
> SSIZE_MAX
) return(EINVAL
);
154 if (S_ISFIFO(vp
->v_mode
)) { /* Pipes */
155 if(rw_flag
== PEEKING
) {
156 printf("read_write: peek on pipe makes no sense\n");
160 op
= (rw_flag
== READING
? VFS_READ
: VFS_WRITE
);
161 r
= rw_pipe(rw_flag
, for_e
, f
, op
, fd
, buf
, size
, 0 /*cum_io*/);
162 } else if (S_ISCHR(vp
->v_mode
)) { /* Character special files. */
163 if(rw_flag
== PEEKING
) {
164 printf("read_write: peek on char device makes no sense\n");
168 if (vp
->v_sdev
== NO_DEV
)
169 panic("VFS: read_write tries to access char dev NO_DEV");
172 op
= (rw_flag
== READING
? CDEV_READ
: CDEV_WRITE
);
174 r
= cdev_io(op
, dev
, for_e
, buf
, position
, size
, f
->filp_flags
);
176 /* This should no longer happen: all calls are asynchronous. */
177 printf("VFS: I/O to device %llx succeeded immediately!?\n", dev
);
181 } else if (r
== SUSPEND
) {
182 /* FIXME: multiple read/write operations on a single filp
183 * should be serialized. They currently aren't; in order to
184 * achieve a similar effect, we optimistically advance the file
185 * position here. This works under the following assumptions:
186 * - character drivers that use the seek position at all,
187 * expose a view of a statically-sized range of bytes, i.e.,
188 * they are basically byte-granular block devices;
189 * - if short I/O or an error is returned, all subsequent calls
190 * will return (respectively) EOF and an error;
191 * - the application never checks its own file seek position,
192 * or does not care that it may end up having seeked beyond
193 * the number of bytes it has actually read;
194 * - communication to the character driver is FIFO (this one
195 * is actually true! whew).
196 * Many improvements are possible here, but in the end,
197 * anything short of queuing concurrent operations will be
198 * suboptimal - so we settle for this hack for now.
202 } else if (S_ISSOCK(vp
->v_mode
)) {
203 if (rw_flag
== PEEKING
) {
204 printf("VFS: read_write tries to peek on sock dev\n");
208 if (vp
->v_sdev
== NO_DEV
)
209 panic("VFS: read_write tries to access sock dev NO_DEV");
211 r
= sdev_readwrite(vp
->v_sdev
, buf
, size
, 0, 0, 0, 0, 0, rw_flag
,
213 } else if (S_ISBLK(vp
->v_mode
)) { /* Block special files. */
214 if (vp
->v_sdev
== NO_DEV
)
215 panic("VFS: read_write tries to access block dev NO_DEV");
219 if(rw_flag
== PEEKING
) {
220 r
= req_bpeek(vp
->v_bfs_e
, vp
->v_sdev
, position
, size
);
222 r
= req_breadwrite(vp
->v_bfs_e
, for_e
, vp
->v_sdev
, position
,
223 size
, buf
, rw_flag
, &res_pos
, &res_cum_io
);
226 cum_io
+= res_cum_io
;
231 } else { /* Regular files */
232 if (rw_flag
== WRITING
) {
233 /* Check for O_APPEND flag. */
234 if (f
->filp_flags
& O_APPEND
) position
= vp
->v_size
;
238 if(rw_flag
== PEEKING
) {
239 r
= req_peek(vp
->v_fs_e
, vp
->v_inode_nr
, position
, size
);
242 r
= req_readwrite(vp
->v_fs_e
, vp
->v_inode_nr
, position
,
243 rw_flag
, for_e
, buf
, size
, &new_pos
,
248 cum_io
+= cum_io_incr
;
253 /* On write, update file size and access time. */
254 if (rw_flag
== WRITING
) {
255 if (S_ISREG(vp
->v_mode
) || S_ISDIR(vp
->v_mode
)) {
256 if (position
> vp
->v_size
) {
257 vp
->v_size
= position
;
262 f
->filp_pos
= position
;
264 if (r
== EPIPE
&& rw_flag
== WRITING
) {
265 /* Process is writing, but there is no reader. Tell the kernel to
266 * generate a SIGPIPE signal.
268 if (!(f
->filp_flags
& O_NOSIGPIPE
)) {
269 sys_kill(rfp
->fp_endpoint
, SIGPIPE
);
279 /*===========================================================================*
281 *===========================================================================*/
282 int do_getdents(void)
284 /* Perform the getdents(fd, buf, size) system call. */
289 register struct filp
*rfilp
;
291 /* This field must always be set to zero for getdents(). */
292 if (job_m_in
.m_lc_vfs_readwrite
.cum_io
!= 0)
295 fd
= job_m_in
.m_lc_vfs_readwrite
.fd
;
296 buf
= job_m_in
.m_lc_vfs_readwrite
.buf
;
297 size
= job_m_in
.m_lc_vfs_readwrite
.len
;
299 /* Is the file descriptor valid? */
300 if ( (rfilp
= get_filp(fd
, VNODE_READ
)) == NULL
)
303 if (!(rfilp
->filp_mode
& R_BIT
))
305 else if (!S_ISDIR(rfilp
->filp_vno
->v_mode
))
309 r
= req_getdents(rfilp
->filp_vno
->v_fs_e
, rfilp
->filp_vno
->v_inode_nr
,
310 rfilp
->filp_pos
, buf
, size
, &new_pos
, 0);
312 if (r
> 0) rfilp
->filp_pos
= new_pos
;
320 /*===========================================================================*
322 *===========================================================================*/
323 int rw_pipe(int rw_flag
, endpoint_t usr_e
, struct filp
*f
, int callnr
, int fd
,
324 vir_bytes buf
, size_t nbytes
, size_t cum_io
)
326 int r
, oflags
, partial_pipe
= FALSE
;
330 off_t position
, new_pos
;
332 /* Must make sure we're operating on locked filp and vnode */
333 assert(tll_locked_by_me(&f
->filp_vno
->v_lock
));
334 assert(mutex_trylock(&f
->filp_lock
) == -EDEADLK
);
336 oflags
= f
->filp_flags
;
338 position
= 0; /* Not actually used */
340 assert(rw_flag
== READING
|| rw_flag
== WRITING
);
342 r
= pipe_check(f
, rw_flag
, oflags
, nbytes
, 0);
345 pipe_suspend(callnr
, fd
, buf
, nbytes
, cum_io
);
347 /* If pipe_check returns an error instead of suspending the call, we
348 * return that error, even if we are resuming a partially completed
349 * operation (ie, a large blocking write), to match NetBSD's behavior.
355 if (size
< nbytes
) partial_pipe
= TRUE
;
357 /* Truncate read request at size. */
358 if (rw_flag
== READING
&& size
> vp
->v_size
) {
362 if (vp
->v_mapfs_e
== 0)
363 panic("unmapped pipe");
365 r
= req_readwrite(vp
->v_mapfs_e
, vp
->v_mapinode_nr
, position
, rw_flag
, usr_e
,
366 buf
, size
, &new_pos
, &cum_io_incr
);
369 assert(r
!= SUSPEND
);
373 cum_io
+= cum_io_incr
;
375 nbytes
-= cum_io_incr
;
377 if (rw_flag
== READING
)
378 vp
->v_size
-= cum_io_incr
;
380 vp
->v_size
+= cum_io_incr
;
383 /* partial write on pipe with */
384 /* O_NONBLOCK, return write count */
385 if (!(oflags
& O_NONBLOCK
)) {
386 /* partial write on pipe with nbytes > PIPE_BUF, non-atomic */
387 pipe_suspend(callnr
, fd
, buf
, nbytes
, cum_io
);