9 #include <sys/dirent.h>
13 static struct buf
*rahead(struct inode
*rip
, block_t baseblock
, u64_t
14 position
, unsigned bytes_ahead
);
15 static int rw_chunk(struct inode
*rip
, u64_t position
, unsigned off
,
16 size_t chunk
, unsigned left
, int call
, struct fsdriver_data
*data
,
17 unsigned buf_off
, unsigned int block_size
, int *completed
);
20 /*===========================================================================*
22 *===========================================================================*/
23 ssize_t
fs_readwrite(ino_t ino_nr
, struct fsdriver_data
*data
, size_t nrbytes
,
24 off_t position
, int call
)
28 off_t f_size
, bytes_left
;
29 size_t off
, cum_io
, block_size
, chunk
;
36 /* Find the inode referred */
37 if ((rip
= find_inode(fs_dev
, ino_nr
)) == NULL
)
40 mode_word
= rip
->i_mode
& I_TYPE
;
41 regular
= (mode_word
== I_REGULAR
);
43 /* Determine blocksize */
44 block_size
= rip
->i_sp
->s_block_size
;
47 /* If this is file i/o, check we can write */
48 if (call
== FSC_WRITE
) {
49 if(rip
->i_sp
->s_rd_only
)
52 /* Check in advance to see if file will grow too big. */
53 if (position
> (off_t
) (rip
->i_sp
->s_max_size
- nrbytes
))
56 /* Clear the zone containing present EOF if hole about
57 * to be created. This is necessary because all unwritten
58 * blocks prior to the EOF must read as zeros.
60 if(position
> f_size
) clear_zone(rip
, f_size
, 0);
64 /* Split the transfer into chunks that don't span two blocks. */
66 off
= ((unsigned int) position
) % block_size
; /* offset in blk*/
67 chunk
= block_size
- off
;
71 if (call
!= FSC_WRITE
) {
72 bytes_left
= f_size
- position
;
73 if (position
>= f_size
) break; /* we are beyond EOF */
74 if (chunk
> (unsigned int) bytes_left
) chunk
= bytes_left
;
77 /* Read or write 'chunk' bytes. */
78 r
= rw_chunk(rip
, ((u64_t
)((unsigned long)position
)), off
, chunk
,
79 nrbytes
, call
, data
, cum_io
, block_size
, &completed
);
83 /* Update counters and pointers. */
84 nrbytes
-= chunk
; /* bytes yet to be read */
85 cum_io
+= chunk
; /* bytes read so far */
86 position
+= (off_t
) chunk
; /* position within the file */
89 /* On write, update file size and access time. */
90 if (call
== FSC_WRITE
) {
91 if (regular
|| mode_word
== I_DIRECTORY
) {
92 if (position
> f_size
) rip
->i_size
= position
;
96 rip
->i_seek
= NO_SEEK
;
101 /* even on a ROFS, writing to a device node on it is fine,
102 * just don't update the inode stats for it. And dito for reading.
104 if (!rip
->i_sp
->s_rd_only
) {
105 if (call
== FSC_READ
) rip
->i_update
|= ATIME
;
106 if (call
== FSC_WRITE
) rip
->i_update
|= CTIME
| MTIME
;
107 IN_MARKDIRTY(rip
); /* inode is thus now dirty */
114 /*===========================================================================*
116 *===========================================================================*/
117 static int rw_chunk(rip
, position
, off
, chunk
, left
, call
, data
, buf_off
,
118 block_size
, completed
)
119 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
120 u64_t position
; /* position within file to read or write */
121 unsigned off
; /* off within the current block */
122 size_t chunk
; /* number of bytes to read or write */
123 unsigned left
; /* max number of bytes wanted after position */
124 int call
; /* FSC_READ, FSC_WRITE, or FSC_PEEK */
125 struct fsdriver_data
*data
; /* structure for (remote) user buffer */
126 unsigned buf_off
; /* offset in user buffer */
127 unsigned int block_size
; /* block size of FS operating on */
128 int *completed
; /* number of bytes copied */
130 /* Read or write (part of) a block. */
131 struct buf
*bp
= NULL
;
136 ino_t ino
= VMC_NO_INODE
;
137 u64_t ino_off
= rounddown(position
, block_size
);
141 if (ex64hi(position
) != 0)
142 panic("rw_chunk: position too high");
143 b
= read_map(rip
, (off_t
) ex64lo(position
), 0);
146 assert(ino
!= VMC_NO_INODE
);
149 if (call
== FSC_READ
) {
150 /* Reading from a nonexistent block. Must read as all zeros.*/
151 r
= fsdriver_zero(data
, buf_off
, chunk
);
153 printf("MFS: fsdriver_zero failed\n");
156 } else if (call
== FSC_PEEK
) {
157 /* Peeking a nonexistent block. Report to VM. */
158 lmfs_zero_block_ino(dev
, ino
, ino_off
);
161 /* Writing to a nonexistent block.
162 * Create and enter in inode.
164 if ((bp
= new_block(rip
, (off_t
) ex64lo(position
))) == NULL
)
167 } else if (call
!= FSC_WRITE
) {
168 /* Read and read ahead if convenient. */
169 bp
= rahead(rip
, b
, position
, left
);
171 /* Normally an existing block to be partially overwritten is first read
172 * in. However, a full block need not be read in. If it is already in
173 * the cache, acquire it, otherwise just acquire a free buffer.
175 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
176 if (off
== 0 && (off_t
) ex64lo(position
) >= rip
->i_size
)
178 assert(ino
!= VMC_NO_INODE
);
179 assert(!(ino_off
% block_size
));
180 if ((r
= lmfs_get_block_ino(&bp
, dev
, b
, n
, ino
, ino_off
)) != OK
)
181 panic("MFS: error getting block (%llu,%u): %d", dev
, b
, r
);
184 /* In all cases, bp now points to a valid buffer. */
187 if (call
== FSC_WRITE
&& chunk
!= block_size
&&
188 (off_t
) ex64lo(position
) >= rip
->i_size
&& off
== 0) {
192 if (call
== FSC_READ
) {
193 /* Copy a chunk from the block buffer to user space. */
194 r
= fsdriver_copyout(data
, buf_off
, b_data(bp
)+off
, chunk
);
195 } else if (call
== FSC_WRITE
) {
196 /* Copy a chunk from user space to the block buffer. */
197 r
= fsdriver_copyin(data
, buf_off
, b_data(bp
)+off
, chunk
);
207 /*===========================================================================*
209 *===========================================================================*/
210 block_t
read_map(rip
, position
, opportunistic
)
211 register struct inode
*rip
; /* ptr to inode to map from */
212 off_t position
; /* position in file whose blk wanted */
213 int opportunistic
; /* if nonzero, only use cache for metadata */
215 /* Given an inode and a position within the corresponding file, locate the
216 * block (not zone) number in which that position is to be found and return it.
221 int scale
, boff
, index
, zind
;
222 unsigned int dzones
, nr_indirects
;
224 unsigned long excess
, zone
, block_pos
;
227 iomode
= opportunistic
? PEEK
: NORMAL
;
229 scale
= rip
->i_sp
->s_log_zone_size
; /* for block-zone conversion */
230 block_pos
= position
/rip
->i_sp
->s_block_size
; /* relative blk # in file */
231 zone
= block_pos
>> scale
; /* position's zone */
232 boff
= (int) (block_pos
- (zone
<< scale
) ); /* relative blk # within zone */
233 dzones
= rip
->i_ndzones
;
234 nr_indirects
= rip
->i_nindirs
;
236 /* Is 'position' to be found in the inode itself? */
238 zind
= (int) zone
; /* index should be an int */
239 z
= rip
->i_zone
[zind
];
240 if (z
== NO_ZONE
) return(NO_BLOCK
);
241 b
= (block_t
) ((z
<< scale
) + boff
);
245 /* It is not in the inode, so it must be single or double indirect. */
246 excess
= zone
- dzones
; /* first Vx_NR_DZONES don't count */
248 if (excess
< nr_indirects
) {
249 /* 'position' can be located via the single indirect block. */
250 z
= rip
->i_zone
[dzones
];
252 /* 'position' can be located via the double indirect block. */
253 if ( (z
= rip
->i_zone
[dzones
+1]) == NO_ZONE
) return(NO_BLOCK
);
254 excess
-= nr_indirects
; /* single indir doesn't count*/
255 b
= (block_t
) z
<< scale
;
256 ASSERT(rip
->i_dev
!= NO_DEV
);
257 index
= (int) (excess
/nr_indirects
);
258 if ((unsigned int) index
> rip
->i_nindirs
)
259 return(NO_BLOCK
); /* Can't go beyond double indirects */
260 bp
= get_block(rip
->i_dev
, b
, iomode
); /* get double indirect block */
262 return NO_BLOCK
; /* peeking failed */
263 z
= rd_indir(bp
, index
); /* z= zone for single*/
264 put_block(bp
); /* release double ind block */
265 excess
= excess
% nr_indirects
; /* index into single ind blk */
268 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
269 if (z
== NO_ZONE
) return(NO_BLOCK
);
270 b
= (block_t
) z
<< scale
; /* b is blk # for single ind */
271 bp
= get_block(rip
->i_dev
, b
, iomode
); /* get single indirect block */
273 return NO_BLOCK
; /* peeking failed */
274 z
= rd_indir(bp
, (int) excess
); /* get block pointed to */
275 put_block(bp
); /* release single indir blk */
276 if (z
== NO_ZONE
) return(NO_BLOCK
);
277 b
= (block_t
) ((z
<< scale
) + boff
);
281 struct buf
*get_block_map(register struct inode
*rip
, u64_t position
)
285 block_t b
= read_map(rip
, position
, 0); /* get block number */
288 block_size
= get_block_size(rip
->i_dev
);
289 position
= rounddown(position
, block_size
);
290 assert(rip
->i_num
!= VMC_NO_INODE
);
291 if ((r
= lmfs_get_block_ino(&bp
, rip
->i_dev
, b
, NORMAL
, rip
->i_num
,
293 panic("MFS: error getting block (%llu,%u): %d",
298 /*===========================================================================*
300 *===========================================================================*/
301 zone_t
rd_indir(bp
, index
)
302 struct buf
*bp
; /* pointer to indirect block */
303 int index
; /* index into *bp */
305 struct super_block
*sp
;
309 panic("rd_indir() on NULL");
313 /* read a zone from an indirect block */
314 assert(sp
->s_version
== V3
);
315 zone
= (zone_t
) conv4(sp
->s_native
, (long) b_v2_ind(bp
)[index
]);
317 if (zone
!= NO_ZONE
&&
318 (zone
< (zone_t
) sp
->s_firstdatazone
|| zone
>= sp
->s_zones
)) {
319 printf("Illegal zone number %ld in indirect block, index %d\n",
321 panic("check file system");
327 /*===========================================================================*
329 *===========================================================================*/
330 static struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
331 register struct inode
*rip
; /* pointer to inode for file to be read */
332 block_t baseblock
; /* block at current position */
333 u64_t position
; /* position within file */
334 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
336 /* Fetch a block from the cache or the device. If a physical read is
337 * required, prefetch as many more blocks as convenient into the cache.
338 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
339 * The device driver may decide it knows better and stop reading at a
340 * cylinder boundary (or after an error). Rw_scattered() puts an optional
341 * flag on all reads to allow this.
343 /* Minimum number of blocks to prefetch. */
344 # define BLOCKS_MINIMUM 32
345 int r
, scale
, read_q_size
;
346 unsigned int blocks_ahead
, fragment
, block_size
;
347 block_t block
, blocks_left
;
351 static block64_t read_q
[LMFS_MAX_PREFETCH
];
352 u64_t position_running
;
355 assert(dev
!= NO_DEV
);
357 block_size
= get_block_size(dev
);
361 fragment
= position
% block_size
;
362 position
-= fragment
;
363 position_running
= position
;
364 bytes_ahead
+= fragment
;
365 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
367 r
= lmfs_get_block_ino(&bp
, dev
, block
, PEEK
, rip
->i_num
, position
);
371 panic("MFS: error getting block (%llu,%u): %d", dev
, block
, r
);
373 /* The best guess for the number of blocks to prefetch: A lot.
374 * It is impossible to tell what the device looks like, so we don't even
375 * try to guess the geometry, but leave it to the driver.
377 * The floppy driver can read a full track with no rotational delay, and it
378 * avoids reading partial tracks if it can, so handing it enough buffers to
379 * read two tracks is perfect. (Two, because some diskette types have
380 * an odd number of sectors per track, so a block may span tracks.)
382 * The disk drivers don't try to be smart. With todays disks it is
383 * impossible to tell what the real geometry looks like, so it is best to
384 * read as much as you can. With luck the caching on the drive allows
385 * for a little time to start the next read.
387 * The current solution below is a bit of a hack, it just reads blocks from
388 * the current file position hoping that more of the file can be found. A
389 * better solution must look at the already available zone pointers and
390 * indirect blocks (but don't call read_map!).
393 blocks_left
= (block_t
) (rip
->i_size
-ex64lo(position
)+(block_size
-1)) /
396 /* Go for the first indirect block if we are in its neighborhood. */
397 scale
= rip
->i_sp
->s_log_zone_size
;
398 ind1_pos
= (off_t
) rip
->i_ndzones
* (block_size
<< scale
);
399 if ((off_t
) ex64lo(position
) <= ind1_pos
&& rip
->i_size
> ind1_pos
) {
404 /* Read at least the minimum number of blocks, but not after a seek. */
405 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
406 blocks_ahead
= BLOCKS_MINIMUM
;
408 /* Can't go past end of file. */
409 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
411 /* No more than the maximum request. */
412 if (blocks_ahead
> LMFS_MAX_PREFETCH
) blocks_ahead
= LMFS_MAX_PREFETCH
;
416 /* Acquire block buffers. */
419 read_q
[read_q_size
++] = block
;
421 if (--blocks_ahead
== 0) break;
424 position_running
+= block_size
;
426 thisblock
= read_map(rip
, (off_t
) ex64lo(position_running
), 1);
427 if (thisblock
!= NO_BLOCK
) {
428 r
= lmfs_get_block_ino(&bp
, dev
, thisblock
, PEEK
, rip
->i_num
,
432 r
= lmfs_get_block(&bp
, dev
, block
, PEEK
);
435 /* Oops, block already in the cache, get out. */
440 panic("MFS: error getting block (%llu,%u): %d", dev
, block
, r
);
442 lmfs_prefetch(dev
, read_q
, read_q_size
);
444 r
= lmfs_get_block_ino(&bp
, dev
, baseblock
, NORMAL
, rip
->i_num
, position
);
446 panic("MFS: error getting block (%llu,%u): %d", dev
, baseblock
, r
);
451 /*===========================================================================*
453 *===========================================================================*/
454 ssize_t
fs_getdents(ino_t ino_nr
, struct fsdriver_data
*data
, size_t bytes
,
457 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + MFS_NAME_MAX + 1)
458 #define GETDENTS_ENTRIES 8
459 static char getdents_buf
[GETDENTS_BUFSIZE
* GETDENTS_ENTRIES
];
460 struct fsdriver_dentry fsdentry
;
461 struct inode
*rip
, *entrip
;
463 unsigned int block_size
, len
, type
;
464 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
469 /* Check whether the position is properly aligned */
471 if( (unsigned int) pos
% DIR_ENTRY_SIZE
)
474 if( (rip
= get_inode(fs_dev
, ino_nr
)) == NULL
)
477 block_size
= rip
->i_sp
->s_block_size
;
478 off
= (pos
% block_size
); /* Offset in block */
479 block_pos
= pos
- off
;
480 done
= FALSE
; /* Stop processing directory blocks when done is set */
482 fsdriver_dentry_init(&fsdentry
, data
, bytes
, getdents_buf
,
483 sizeof(getdents_buf
));
485 /* The default position for the next request is EOF. If the user's buffer
486 * fills up before EOF, new_pos will be modified. */
487 new_pos
= rip
->i_size
;
491 for(; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
492 /* Since directories don't have holes, 'bp' cannot be NULL. */
493 bp
= get_block_map(rip
, block_pos
); /* get a dir block */
496 /* Search a directory block. */
498 dp
= &b_dir(bp
)[off
/ DIR_ENTRY_SIZE
];
501 for (; dp
< &b_dir(bp
)[NR_DIR_ENTRIES(block_size
)]; dp
++) {
502 if (dp
->mfs_d_ino
== 0)
503 continue; /* Entry is not in use */
505 /* Compute the length of the name */
506 cp
= memchr(dp
->mfs_d_name
, '\0', sizeof(dp
->mfs_d_name
));
508 len
= sizeof(dp
->mfs_d_name
);
510 len
= cp
- (dp
->mfs_d_name
);
512 /* Need the position of this entry in the directory */
513 ent_pos
= block_pos
+ ((char *) dp
- (char *) bp
->data
);
515 /* We also need(?) the file type of the target inode. */
516 if (!(entrip
= get_inode(fs_dev
, (ino_t
) dp
->mfs_d_ino
)))
517 panic("unexpected get_inode failure");
518 type
= IFTODT(entrip
->i_mode
);
521 /* MFS does not store file types in its directory entries, and
522 * fetching the mode from the inode is seriously expensive.
523 * Userland should always be prepared to receive DT_UNKNOWN.
525 r
= fsdriver_dentry_add(&fsdentry
, (ino_t
) dp
->mfs_d_ino
,
526 dp
->mfs_d_name
, len
, type
);
528 /* If the user buffer is full, or an error occurred, stop. */
532 /* Record the position of this entry, it is the
533 * starting point of the next request (unless the
534 * postion is modified with lseek).
546 if (r
>= 0 && (r
= fsdriver_dentry_finish(&fsdentry
)) >= 0) {
548 if(!rip
->i_sp
->s_rd_only
) {
549 rip
->i_update
|= ATIME
;
554 put_inode(rip
); /* release the inode */