1 /* Created (MFS based):
2 * February 2010 (Evgeniy Ivanov)
12 #include <sys/param.h>
13 #include <sys/dirent.h>
17 static struct buf
*rahead(struct inode
*rip
, block_t baseblock
, u64_t
18 position
, unsigned bytes_ahead
);
19 static int rw_chunk(struct inode
*rip
, u64_t position
, unsigned off
,
20 size_t chunk
, unsigned left
, int call
, struct fsdriver_data
*data
,
21 unsigned buf_off
, unsigned int block_size
, int *completed
);
23 /*===========================================================================*
25 *===========================================================================*/
26 ssize_t
fs_readwrite(ino_t ino_nr
, struct fsdriver_data
*data
, size_t nrbytes
,
27 off_t position
, int call
)
31 off_t f_size
, bytes_left
;
32 size_t off
, cum_io
, block_size
, chunk
;
39 /* Find the inode referred */
40 if ((rip
= find_inode(fs_dev
, ino_nr
)) == NULL
)
43 mode_word
= rip
->i_mode
& I_TYPE
;
44 regular
= (mode_word
== I_REGULAR
);
46 /* Determine blocksize */
47 block_size
= rip
->i_sp
->s_block_size
;
49 if (f_size
< 0) f_size
= MAX_FILE_POS
;
51 if (call
== FSC_WRITE
) {
52 /* Check in advance to see if file will grow too big. */
53 if (position
> (off_t
) (rip
->i_sp
->s_max_size
- nrbytes
))
58 /* Split the transfer into chunks that don't span two blocks. */
59 while (nrbytes
!= 0) {
60 off
= (unsigned int) (position
% block_size
);/* offset in blk*/
61 chunk
= block_size
- off
;
65 if (call
== FSC_READ
) {
66 bytes_left
= f_size
- position
;
67 if (position
>= f_size
) break; /* we are beyond EOF */
68 if (chunk
> bytes_left
) chunk
= (int) bytes_left
;
71 /* Read or write 'chunk' bytes. */
72 r
= rw_chunk(rip
, ((u64_t
)((unsigned long)position
)), off
, chunk
,
73 nrbytes
, call
, data
, cum_io
, block_size
, &completed
);
77 /* Update counters and pointers. */
78 nrbytes
-= chunk
; /* bytes yet to be read */
79 cum_io
+= chunk
; /* bytes read so far */
80 position
+= (off_t
) chunk
; /* position within the file */
83 /* On write, update file size and access time. */
84 if (call
== FSC_WRITE
) {
85 if (regular
|| mode_word
== I_DIRECTORY
) {
86 if (position
> f_size
) rip
->i_size
= position
;
90 rip
->i_seek
= NO_SEEK
;
95 if (call
== FSC_READ
) rip
->i_update
|= ATIME
;
96 if (call
== FSC_WRITE
) rip
->i_update
|= CTIME
| MTIME
;
97 rip
->i_dirt
= IN_DIRTY
; /* inode is thus now dirty */
103 /*===========================================================================*
105 *===========================================================================*/
106 static int rw_chunk(rip
, position
, off
, chunk
, left
, call
, data
, buf_off
,
107 block_size
, completed
)
108 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
109 u64_t position
; /* position within file to read or write */
110 unsigned off
; /* off within the current block */
111 size_t chunk
; /* number of bytes to read or write */
112 unsigned left
; /* max number of bytes wanted after position */
113 int call
; /* FSC_READ, FSC_WRITE, or FSC_PEEK */
114 struct fsdriver_data
*data
; /* structure for (remote) user buffer */
115 unsigned buf_off
; /* offset in user buffer */
116 unsigned int block_size
; /* block size of FS operating on */
117 int *completed
; /* number of bytes copied */
119 /* Read or write (part of) a block. */
121 struct buf
*bp
= NULL
;
126 ino_t ino
= VMC_NO_INODE
;
127 u64_t ino_off
= rounddown(position
, block_size
);
131 if (ex64hi(position
) != 0)
132 panic("rw_chunk: position too high");
133 b
= read_map(rip
, (off_t
) ex64lo(position
), 0);
136 assert(ino
!= VMC_NO_INODE
);
139 if (call
== FSC_READ
) {
140 /* Reading from a nonexistent block. Must read as all zeros.*/
141 r
= fsdriver_zero(data
, buf_off
, chunk
);
143 printf("ext2fs: fsdriver_zero failed\n");
146 } else if (call
== FSC_PEEK
) {
147 /* Peeking a nonexistent block. Report to VM. */
148 lmfs_zero_block_ino(dev
, ino
, ino_off
);
151 /* Writing to a nonexistent block.
152 * Create and enter in inode.
154 if ((bp
= new_block(rip
, (off_t
) ex64lo(position
))) == NULL
)
157 } else if (call
!= FSC_WRITE
) {
158 /* Read and read ahead if convenient. */
159 bp
= rahead(rip
, b
, position
, left
);
161 /* Normally an existing block to be partially overwritten is first read
162 * in. However, a full block need not be read in. If it is already in
163 * the cache, acquire it, otherwise just acquire a free buffer.
165 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
166 if (off
== 0 && (off_t
) ex64lo(position
) >= rip
->i_size
)
168 assert(ino
!= VMC_NO_INODE
);
169 assert(!(ino_off
% block_size
));
170 if ((r
= lmfs_get_block_ino(&bp
, dev
, b
, n
, ino
, ino_off
)) != OK
)
171 panic("ext2: error getting block (%llu,%u): %d", dev
, b
, r
);
174 /* In all cases, bp now points to a valid buffer. */
176 panic("bp not valid in rw_chunk, this can't happen");
178 if (call
== FSC_WRITE
&& chunk
!= block_size
&&
179 (off_t
) ex64lo(position
) >= rip
->i_size
&& off
== 0) {
183 if (call
== FSC_READ
) {
184 /* Copy a chunk from the block buffer to user space. */
185 r
= fsdriver_copyout(data
, buf_off
, b_data(bp
)+off
, chunk
);
186 } else if (call
== FSC_WRITE
) {
187 /* Copy a chunk from user space to the block buffer. */
188 r
= fsdriver_copyin(data
, buf_off
, b_data(bp
)+off
, chunk
);
198 /*===========================================================================*
200 *===========================================================================*/
201 block_t
read_map(rip
, position
, opportunistic
)
202 register struct inode
*rip
; /* ptr to inode to map from */
203 off_t position
; /* position in file whose blk wanted */
206 /* Given an inode and a position within the corresponding file, locate the
207 * block number in which that position is to be found and return it.
213 unsigned long excess
, block_pos
;
214 static char first_time
= TRUE
;
215 static long addr_in_block
;
216 static long addr_in_block2
;
217 static long doub_ind_s
;
218 static long triple_ind_s
;
219 static long out_range_s
;
222 iomode
= opportunistic
? PEEK
: NORMAL
;
225 addr_in_block
= rip
->i_sp
->s_block_size
/ BLOCK_ADDRESS_BYTES
;
226 addr_in_block2
= addr_in_block
* addr_in_block
;
227 doub_ind_s
= EXT2_NDIR_BLOCKS
+ addr_in_block
;
228 triple_ind_s
= doub_ind_s
+ addr_in_block2
;
229 out_range_s
= triple_ind_s
+ addr_in_block2
* addr_in_block
;
233 block_pos
= position
/ rip
->i_sp
->s_block_size
; /* relative blk # in file */
235 /* Is 'position' to be found in the inode itself? */
236 if (block_pos
< EXT2_NDIR_BLOCKS
)
237 return(rip
->i_block
[block_pos
]);
239 /* It is not in the inode, so it must be single, double or triple indirect */
240 if (block_pos
< doub_ind_s
) {
241 b
= rip
->i_block
[EXT2_NDIR_BLOCKS
]; /* address of single indirect block */
242 mindex
= block_pos
- EXT2_NDIR_BLOCKS
;
243 } else if (block_pos
>= out_range_s
) { /* TODO: do we need it? */
246 /* double or triple indirect block. At first if it's triple,
247 * find double indirect block.
249 excess
= block_pos
- doub_ind_s
;
250 b
= rip
->i_block
[EXT2_DIND_BLOCK
];
251 if (block_pos
>= triple_ind_s
) {
252 b
= rip
->i_block
[EXT2_TIND_BLOCK
];
253 if (b
== NO_BLOCK
) return(NO_BLOCK
);
254 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get triple ind block */
255 excess
= block_pos
- triple_ind_s
;
256 mindex
= excess
/ addr_in_block2
;
257 b
= rd_indir(bp
, mindex
); /* num of double ind block */
258 put_block(bp
); /* release triple ind block */
259 excess
= excess
% addr_in_block2
;
261 if (b
== NO_BLOCK
) return(NO_BLOCK
);
262 bp
= get_block(rip
->i_dev
, b
, iomode
); /* get double indirect block */
264 return NO_BLOCK
; /* peeking failed */
265 mindex
= excess
/ addr_in_block
;
266 b
= rd_indir(bp
, mindex
); /* num of single ind block */
267 put_block(bp
); /* release double ind block */
268 mindex
= excess
% addr_in_block
; /* index into single ind blk */
270 if (b
== NO_BLOCK
) return(NO_BLOCK
);
271 bp
= get_block(rip
->i_dev
, b
, iomode
); /* get single indirect block */
273 return NO_BLOCK
; /* peeking failed */
275 b
= rd_indir(bp
, mindex
);
276 put_block(bp
); /* release single ind block */
281 struct buf
*get_block_map(register struct inode
*rip
, u64_t position
)
285 block_t b
= read_map(rip
, position
, 0); /* get block number */
288 block_size
= get_block_size(rip
->i_dev
);
289 position
= rounddown(position
, block_size
);
290 assert(rip
->i_num
!= VMC_NO_INODE
);
291 if ((r
= lmfs_get_block_ino(&bp
, rip
->i_dev
, b
, NORMAL
, rip
->i_num
,
293 panic("ext2: error getting block (%llu,%u): %d",
298 /*===========================================================================*
300 *===========================================================================*/
301 block_t
rd_indir(bp
, mindex
)
302 struct buf
*bp
; /* pointer to indirect block */
303 int mindex
; /* index into *bp */
306 panic("rd_indir() on NULL");
307 /* TODO: use conv call */
308 return conv4(le_CPU
, b_ind(bp
)[mindex
]);
312 /*===========================================================================*
314 *===========================================================================*/
315 static struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
316 register struct inode
*rip
; /* pointer to inode for file to be read */
317 block_t baseblock
; /* block at current position */
318 u64_t position
; /* position within file */
319 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
321 /* Fetch a block from the cache or the device. If a physical read is
322 * required, prefetch as many more blocks as convenient into the cache.
323 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
324 * The device driver may decide it knows better and stop reading at a
325 * cylinder boundary (or after an error). Rw_scattered() puts an optional
326 * flag on all reads to allow this.
328 /* Minimum number of blocks to prefetch. */
329 # define BLOCKS_MINIMUM 32
331 unsigned int blocks_ahead
, fragment
, block_size
;
332 block_t block
, blocks_left
;
335 struct buf
*bp
= NULL
;
336 static block64_t read_q
[LMFS_MAX_PREFETCH
];
337 u64_t position_running
;
340 assert(dev
!= NO_DEV
);
341 block_size
= get_block_size(dev
);
345 fragment
= position
% block_size
;
346 position
-= fragment
;
347 position_running
= position
;
348 bytes_ahead
+= fragment
;
349 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
351 r
= lmfs_get_block_ino(&bp
, dev
, block
, PEEK
, rip
->i_num
, position
);
355 panic("ext2: error getting block (%llu,%u): %d", dev
, block
, r
);
357 /* The best guess for the number of blocks to prefetch: A lot.
358 * It is impossible to tell what the device looks like, so we don't even
359 * try to guess the geometry, but leave it to the driver.
361 * The floppy driver can read a full track with no rotational delay, and it
362 * avoids reading partial tracks if it can, so handing it enough buffers to
363 * read two tracks is perfect. (Two, because some diskette types have
364 * an odd number of sectors per track, so a block may span tracks.)
366 * The disk drivers don't try to be smart. With todays disks it is
367 * impossible to tell what the real geometry looks like, so it is best to
368 * read as much as you can. With luck the caching on the drive allows
369 * for a little time to start the next read.
371 * The current solution below is a bit of a hack, it just reads blocks from
372 * the current file position hoping that more of the file can be found. A
373 * better solution must look at the already available
374 * indirect blocks (but don't call read_map!).
377 blocks_left
= (block_t
) (rip
->i_size
-ex64lo(position
)+(block_size
-1)) /
380 /* Go for the first indirect block if we are in its neighborhood. */
381 ind1_pos
= (EXT2_NDIR_BLOCKS
) * block_size
;
382 if ((off_t
) ex64lo(position
) <= ind1_pos
&& rip
->i_size
> ind1_pos
) {
387 /* Read at least the minimum number of blocks, but not after a seek. */
388 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
389 blocks_ahead
= BLOCKS_MINIMUM
;
391 /* Can't go past end of file. */
392 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
394 /* No more than the maximum request. */
395 if (blocks_ahead
> LMFS_MAX_PREFETCH
) blocks_ahead
= LMFS_MAX_PREFETCH
;
399 /* Acquire block buffers. */
402 read_q
[read_q_size
++] = block
;
404 if (--blocks_ahead
== 0) break;
407 position_running
+= block_size
;
409 thisblock
= read_map(rip
, (off_t
) ex64lo(position_running
), 1);
410 if (thisblock
!= NO_BLOCK
) {
411 r
= lmfs_get_block_ino(&bp
, dev
, thisblock
, PEEK
, rip
->i_num
,
415 r
= lmfs_get_block(&bp
, dev
, block
, PEEK
);
418 /* Oops, block already in the cache, get out. */
423 panic("ext2: error getting block (%llu,%u): %d", dev
, block
,
426 lmfs_prefetch(dev
, read_q
, read_q_size
);
428 r
= lmfs_get_block_ino(&bp
, dev
, baseblock
, NORMAL
, rip
->i_num
, position
);
430 panic("ext2: error getting block (%llu,%u): %d", dev
, baseblock
, r
);
435 /*===========================================================================*
437 *===========================================================================*/
438 static unsigned int get_dtype(struct ext2_disk_dir_desc
*dp
)
440 /* Return the type of the file identified by the given directory entry. */
442 if (!HAS_INCOMPAT_FEATURE(superblock
, INCOMPAT_FILETYPE
))
445 switch (dp
->d_file_type
) {
446 case EXT2_FT_REG_FILE
: return DT_REG
;
447 case EXT2_FT_DIR
: return DT_DIR
;
448 case EXT2_FT_SYMLINK
: return DT_LNK
;
449 case EXT2_FT_BLKDEV
: return DT_BLK
;
450 case EXT2_FT_CHRDEV
: return DT_CHR
;
451 case EXT2_FT_FIFO
: return DT_FIFO
;
452 default: return DT_UNKNOWN
;
456 /*===========================================================================*
458 *===========================================================================*/
459 ssize_t
fs_getdents(ino_t ino_nr
, struct fsdriver_data
*data
, size_t bytes
,
462 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1)
463 #define GETDENTS_ENTRIES 8
464 static char getdents_buf
[GETDENTS_BUFSIZE
* GETDENTS_ENTRIES
];
465 struct fsdriver_dentry fsdentry
;
468 unsigned int block_size
, len
;
469 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
471 struct ext2_disk_dir_desc
*d_desc
;
474 /* Check whether the position is properly aligned */
476 if ((unsigned int) pos
% DIR_ENTRY_ALIGN
)
479 if ((rip
= get_inode(fs_dev
, ino_nr
)) == NULL
)
482 block_size
= rip
->i_sp
->s_block_size
;
483 off
= (pos
% block_size
); /* Offset in block */
484 block_pos
= pos
- off
;
485 done
= FALSE
; /* Stop processing directory blocks when done is set */
487 fsdriver_dentry_init(&fsdentry
, data
, bytes
, getdents_buf
,
488 sizeof(getdents_buf
));
490 /* The default position for the next request is EOF. If the user's buffer
491 * fills up before EOF, new_pos will be modified. */
492 new_pos
= rip
->i_size
;
496 for (; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
497 off_t temp_pos
= block_pos
;
498 /* Since directories don't have holes, 'bp' cannot be NULL. */
499 bp
= get_block_map(rip
, block_pos
); /* get a dir block */
503 /* Search a directory block. */
504 d_desc
= (struct ext2_disk_dir_desc
*) &b_data(bp
);
506 /* we need to seek to entry at off bytes.
507 * when NEXT_DISC_DIR_POS == block_size it's last dentry.
509 for (; temp_pos
+ conv2(le_CPU
, d_desc
->d_rec_len
) <= pos
510 && NEXT_DISC_DIR_POS(d_desc
, &b_data(bp
)) < block_size
;
511 d_desc
= NEXT_DISC_DIR_DESC(d_desc
)) {
512 temp_pos
+= conv2(le_CPU
, d_desc
->d_rec_len
);
515 for (; CUR_DISC_DIR_POS(d_desc
, &b_data(bp
)) < block_size
;
516 d_desc
= NEXT_DISC_DIR_DESC(d_desc
)) {
517 if (d_desc
->d_ino
== 0)
518 continue; /* Entry is not in use */
520 len
= d_desc
->d_name_len
;
521 assert(len
<= NAME_MAX
);
522 assert(len
<= EXT2_NAME_MAX
);
524 /* Need the position of this entry in the directory */
525 ent_pos
= block_pos
+ ((char *)d_desc
- b_data(bp
));
527 child_nr
= (ino_t
) conv4(le_CPU
, d_desc
->d_ino
);
528 r
= fsdriver_dentry_add(&fsdentry
, child_nr
, d_desc
->d_name
,
529 len
, get_dtype(d_desc
));
531 /* If the user buffer is full, or an error occurred, stop. */
535 /* Record the position of this entry, it is the
536 * starting point of the next request (unless the
537 * position is modified with lseek).
549 if (r
>= 0 && (r
= fsdriver_dentry_finish(&fsdentry
)) >= 0) {
551 rip
->i_update
|= ATIME
;
552 rip
->i_dirt
= IN_DIRTY
;
555 put_inode(rip
); /* release the inode */