1 /* $NetBSD: udf_strat_sequential.c,v 1.9 2009/02/10 17:48:19 reinoud Exp $ */
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.9 2009/02/10 17:48:19 reinoud Exp $");
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
66 #include "udf_bswap.h"
69 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
70 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
72 /* --------------------------------------------------------------------- */
75 #define UDF_SHED_MAX 3
77 #define UDF_SHED_READING 0
78 #define UDF_SHED_WRITING 1
79 #define UDF_SHED_SEQWRITING 2
81 struct strat_private
{
82 struct pool desc_pool
; /* node descriptors */
85 kcondvar_t discstrat_cv
; /* to wait on */
86 kmutex_t discstrat_mutex
; /* disc strategy */
88 int run_thread
; /* thread control */
91 struct disk_strategy old_strategy_setting
;
92 struct bufq_state
*queues
[UDF_SHED_MAX
];
93 struct timespec last_queued
[UDF_SHED_MAX
];
97 /* --------------------------------------------------------------------- */
100 udf_wr_nodedscr_callback(struct buf
*buf
)
102 struct udf_node
*udf_node
;
105 KASSERT(buf
->b_data
);
107 /* called when write action is done */
108 DPRINTF(WRITE
, ("udf_wr_nodedscr_callback(): node written out\n"));
110 udf_node
= VTOI(buf
->b_vp
);
111 if (udf_node
== NULL
) {
113 printf("udf_wr_node_callback: NULL node?\n");
117 /* XXX right flags to mark dirty again on error? */
119 udf_node
->i_flags
|= IN_MODIFIED
| IN_ACCESSED
;
120 /* XXX TODO reshedule on error */
123 /* decrement outstanding_nodedscr */
124 KASSERT(udf_node
->outstanding_nodedscr
>= 1);
125 udf_node
->outstanding_nodedscr
--;
126 if (udf_node
->outstanding_nodedscr
== 0) {
127 /* first unlock the node */
128 UDF_UNLOCK_NODE(udf_node
, 0);
129 wakeup(&udf_node
->outstanding_nodedscr
);
132 /* unreference the vnode so it can be recycled */
133 holdrele(udf_node
->vnode
);
138 /* --------------------------------------------------------------------- */
141 udf_create_logvol_dscr_seq(struct udf_strat_args
*args
)
143 union dscrptr
**dscrptr
= &args
->dscr
;
144 struct udf_mount
*ump
= args
->ump
;
145 struct strat_private
*priv
= PRIV(ump
);
148 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
149 *dscrptr
= pool_get(&priv
->desc_pool
, PR_WAITOK
);
150 memset(*dscrptr
, 0, lb_size
);
157 udf_free_logvol_dscr_seq(struct udf_strat_args
*args
)
159 union dscrptr
*dscr
= args
->dscr
;
160 struct udf_mount
*ump
= args
->ump
;
161 struct strat_private
*priv
= PRIV(ump
);
163 pool_put(&priv
->desc_pool
, dscr
);
168 udf_read_logvol_dscr_seq(struct udf_strat_args
*args
)
170 union dscrptr
**dscrptr
= &args
->dscr
;
171 union dscrptr
*tmpdscr
;
172 struct udf_mount
*ump
= args
->ump
;
173 struct long_ad
*icb
= args
->icb
;
174 struct strat_private
*priv
= PRIV(ump
);
176 uint32_t sector
, dummy
;
179 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
181 error
= udf_translate_vtop(ump
, icb
, §or
, &dummy
);
185 /* try to read in fe/efe */
186 error
= udf_read_phys_dscr(ump
, sector
, M_UDFTEMP
, &tmpdscr
);
190 *dscrptr
= pool_get(&priv
->desc_pool
, PR_WAITOK
);
191 memcpy(*dscrptr
, tmpdscr
, lb_size
);
192 free(tmpdscr
, M_UDFTEMP
);
199 udf_write_logvol_dscr_seq(struct udf_strat_args
*args
)
201 union dscrptr
*dscr
= args
->dscr
;
202 struct udf_mount
*ump
= args
->ump
;
203 struct udf_node
*udf_node
= args
->udf_node
;
204 struct long_ad
*icb
= args
->icb
;
205 int waitfor
= args
->waitfor
;
206 uint32_t logsectornr
, sectornr
, dummy
;
210 * we have to decide if we write it out sequential or at its fixed
211 * position by examining the partition its (to be) written on.
213 vpart
= udf_rw16(udf_node
->loc
.loc
.part_num
);
214 logsectornr
= udf_rw32(icb
->loc
.lb_num
);
216 if (ump
->vtop_tp
[vpart
] != UDF_VTOP_TYPE_VIRT
) {
217 error
= udf_translate_vtop(ump
, icb
, §ornr
, &dummy
);
222 /* add reference to the vnode to prevent recycling */
223 vhold(udf_node
->vnode
);
226 DPRINTF(WRITE
, ("udf_write_logvol_dscr: sync write\n"));
228 error
= udf_write_phys_dscr_sync(ump
, udf_node
, UDF_C_NODE
,
229 dscr
, sectornr
, logsectornr
);
231 DPRINTF(WRITE
, ("udf_write_logvol_dscr: no wait, async write\n"));
233 error
= udf_write_phys_dscr_async(ump
, udf_node
, UDF_C_NODE
,
234 dscr
, sectornr
, logsectornr
, udf_wr_nodedscr_callback
);
235 /* will be UNLOCKED in call back */
239 holdrele(udf_node
->vnode
);
241 udf_node
->outstanding_nodedscr
--;
242 if (udf_node
->outstanding_nodedscr
== 0) {
243 UDF_UNLOCK_NODE(udf_node
, 0);
244 wakeup(&udf_node
->outstanding_nodedscr
);
250 /* --------------------------------------------------------------------- */
253 * Main file-system specific sheduler. Due to the nature of optical media
254 * sheduling can't be performed in the traditional way. Most OS
255 * implementations i've seen thus read or write a file atomically giving all
256 * kinds of side effects.
258 * This implementation uses a kernel thread to shedule the queued requests in
259 * such a way that is semi-optimal for optical media; this means aproximately
260 * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
265 udf_queuebuf_seq(struct udf_strat_args
*args
)
267 struct udf_mount
*ump
= args
->ump
;
268 struct buf
*nestbuf
= args
->nestbuf
;
269 struct strat_private
*priv
= PRIV(ump
);
275 KASSERT(nestbuf
->b_iodone
== nestiobuf_iodone
);
277 what
= nestbuf
->b_udf_c_type
;
278 queue
= UDF_SHED_READING
;
279 if ((nestbuf
->b_flags
& B_READ
) == 0) {
281 queue
= UDF_SHED_SEQWRITING
;
282 if (what
== UDF_C_ABSOLUTE
)
283 queue
= UDF_SHED_WRITING
;
286 /* use our own sheduler lists for more complex sheduling */
287 mutex_enter(&priv
->discstrat_mutex
);
288 bufq_put(priv
->queues
[queue
], nestbuf
);
289 vfs_timestamp(&priv
->last_queued
[queue
]);
290 mutex_exit(&priv
->discstrat_mutex
);
292 /* signal our thread that there might be something to do */
293 cv_signal(&priv
->discstrat_cv
);
296 /* --------------------------------------------------------------------- */
298 /* TODO convert to lb_size */
300 udf_VAT_mapping_update(struct udf_mount
*ump
, struct buf
*buf
, uint32_t lb_map
)
302 union dscrptr
*fdscr
= (union dscrptr
*) buf
->b_data
;
303 struct vnode
*vp
= buf
->b_vp
;
304 struct udf_node
*udf_node
= VTOI(vp
);
305 uint32_t lb_size
, blks
;
307 uint32_t udf_rw32_lbmap
;
308 int c_type
= buf
->b_udf_c_type
;
311 /* only interested when we're using a VAT */
312 KASSERT(ump
->vat_node
);
313 KASSERT(ump
->vtop_alloc
[ump
->node_part
] == UDF_ALLOC_VAT
);
315 /* only nodes are recorded in the VAT */
316 /* NOTE: and the fileset descriptor (FIXME ?) */
317 if (c_type
!= UDF_C_NODE
)
320 /* we now have an UDF FE/EFE node on media with VAT (or VAT itself) */
321 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
322 blks
= lb_size
/ DEV_BSIZE
;
324 udf_rw32_lbmap
= udf_rw32(lb_map
);
326 /* if we're the VAT itself, only update our assigned sector number */
327 if (udf_node
== ump
->vat_node
) {
328 fdscr
->tag
.tag_loc
= udf_rw32_lbmap
;
329 udf_validate_tag_sum(fdscr
);
330 DPRINTF(TRANSLATE
, ("VAT assigned to sector %u\n",
331 udf_rw32(udf_rw32_lbmap
)));
332 /* no use mapping the VAT node in the VAT */
336 /* record new position in VAT file */
337 lb_num
= udf_rw32(fdscr
->tag
.tag_loc
);
339 /* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
341 DPRINTF(TRANSLATE
, ("VAT entry change (log %u -> phys %u)\n",
344 /* VAT should be the longer than this write, can't go wrong */
345 KASSERT(lb_num
<= ump
->vat_entries
);
347 mutex_enter(&ump
->allocate_mutex
);
348 error
= udf_vat_write(ump
->vat_node
,
349 (uint8_t *) &udf_rw32_lbmap
, 4,
350 ump
->vat_offset
+ lb_num
* 4);
351 mutex_exit(&ump
->allocate_mutex
);
354 panic( "udf_VAT_mapping_update: HELP! i couldn't "
355 "write in the VAT file ?\n");
360 udf_issue_buf(struct udf_mount
*ump
, int queue
, struct buf
*buf
)
363 struct long_ad
*node_ad_cpy
;
364 struct part_desc
*pdesc
;
365 uint64_t *lmapping
, *lmappos
, blknr
;
366 uint32_t our_sectornr
, sectornr
, bpos
;
370 int sector_size
= ump
->discinfo
.sector_size
;
371 int blks
= sector_size
/ DEV_BSIZE
;
374 /* if reading, just pass to the device's STRATEGY */
375 if (queue
== UDF_SHED_READING
) {
376 DPRINTF(SHEDULE
, ("\nudf_issue_buf READ %p : sector %d type %d,"
377 "b_resid %d, b_bcount %d, b_bufsize %d\n",
378 buf
, (uint32_t) buf
->b_blkno
/ blks
, buf
->b_udf_c_type
,
379 buf
->b_resid
, buf
->b_bcount
, buf
->b_bufsize
));
380 VOP_STRATEGY(ump
->devvp
, buf
);
384 blknr
= buf
->b_blkno
;
385 our_sectornr
= blknr
/ blks
;
387 if (queue
== UDF_SHED_WRITING
) {
388 DPRINTF(SHEDULE
, ("\nudf_issue_buf WRITE %p : sector %d "
389 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
390 buf
, (uint32_t) buf
->b_blkno
/ blks
, buf
->b_udf_c_type
,
391 buf
->b_resid
, buf
->b_bcount
, buf
->b_bufsize
));
392 KASSERT(buf
->b_udf_c_type
== UDF_C_ABSOLUTE
);
394 // udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
395 VOP_STRATEGY(ump
->devvp
, buf
);
399 KASSERT(queue
== UDF_SHED_SEQWRITING
);
400 DPRINTF(SHEDULE
, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
401 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
402 buf
, buf
->b_udf_c_type
, buf
->b_resid
, buf
->b_bcount
,
406 * Buffers should not have been allocated to disc addresses yet on
407 * this queue. Note that a buffer can get multiple extents allocated.
409 * lmapping contains lb_num relative to base partition.
411 lmapping
= ump
->la_lmapping
;
412 node_ad_cpy
= ump
->la_node_ad_cpy
;
414 /* logically allocate buf and map it in the file */
415 udf_late_allocate_buf(ump
, buf
, lmapping
, node_ad_cpy
, &vpart_num
);
418 * NOTE We are using the knowledge here that sequential media will
419 * always be mapped linearly. Thus no use to explicitly translate the
423 /* calculate offset from physical base partition */
424 pdesc
= ump
->partitions
[ump
->vtop
[vpart_num
]];
425 ptov
= udf_rw32(pdesc
->start_loc
);
427 /* set buffers blkno to the physical block number */
428 buf
->b_blkno
= (*lmapping
+ ptov
) * blks
;
430 /* fixate floating descriptors */
431 if (buf
->b_udf_c_type
== UDF_C_FLOAT_DSCR
) {
432 /* set our tag location to the absolute position */
433 dscr
= (union dscrptr
*) buf
->b_data
;
434 dscr
->tag
.tag_loc
= udf_rw32(*lmapping
+ ptov
);
435 udf_validate_tag_and_crc_sums(dscr
);
438 /* update mapping in the VAT */
439 if (buf
->b_udf_c_type
== UDF_C_NODE
) {
440 udf_VAT_mapping_update(ump
, buf
, *lmapping
);
441 udf_fixup_node_internals(ump
, buf
->b_data
, buf
->b_udf_c_type
);
444 /* if we have FIDs, fixup using the new allocation table */
445 if (buf
->b_udf_c_type
== UDF_C_FIDS
) {
446 buf_len
= buf
->b_bcount
;
450 sectornr
= *lmappos
++;
451 len
= MIN(buf_len
, sector_size
);
452 fidblk
= (uint8_t *) buf
->b_data
+ bpos
;
453 udf_fixup_fid_block(fidblk
, sector_size
,
460 VOP_STRATEGY(ump
->devvp
, buf
);
465 udf_doshedule(struct udf_mount
*ump
)
468 struct timespec now
, *last
;
469 struct strat_private
*priv
= PRIV(ump
);
470 void (*b_callback
)(struct buf
*);
474 buf
= bufq_get(priv
->queues
[priv
->cur_queue
]);
476 /* transfer from the current queue to the device queue */
477 mutex_exit(&priv
->discstrat_mutex
);
479 /* transform buffer to synchronous; XXX needed? */
480 b_callback
= buf
->b_iodone
;
481 buf
->b_iodone
= NULL
;
482 CLR(buf
->b_flags
, B_ASYNC
);
484 /* issue and wait on completion */
485 udf_issue_buf(ump
, priv
->cur_queue
, buf
);
488 mutex_enter(&priv
->discstrat_mutex
);
490 /* if there is an error, repair this error, otherwise propagate */
491 if (buf
->b_error
&& ((buf
->b_flags
& B_READ
) == 0)) {
492 /* check what we need to do */
493 panic("UDF write error, can't handle yet!\n");
496 /* propagate result to higher layers */
498 buf
->b_iodone
= b_callback
;
499 (*buf
->b_iodone
)(buf
);
505 /* Check if we're idling in this state */
507 last
= &priv
->last_queued
[priv
->cur_queue
];
508 if (ump
->discinfo
.mmc_class
== MMC_CLASS_CD
) {
509 /* dont switch too fast for CD media; its expensive in time */
510 if (now
.tv_sec
- last
->tv_sec
< 3)
514 /* check if we can/should switch */
515 new_queue
= priv
->cur_queue
;
517 if (bufq_peek(priv
->queues
[UDF_SHED_READING
]))
518 new_queue
= UDF_SHED_READING
;
519 if (bufq_peek(priv
->queues
[UDF_SHED_WRITING
])) /* only for unmount */
520 new_queue
= UDF_SHED_WRITING
;
521 if (bufq_peek(priv
->queues
[UDF_SHED_SEQWRITING
]))
522 new_queue
= UDF_SHED_SEQWRITING
;
523 if (priv
->cur_queue
== UDF_SHED_READING
) {
524 if (new_queue
== UDF_SHED_SEQWRITING
) {
525 /* TODO use flag to signal if this is needed */
526 mutex_exit(&priv
->discstrat_mutex
);
528 /* update trackinfo for data and metadata */
529 error
= udf_update_trackinfo(ump
,
532 error
= udf_update_trackinfo(ump
,
533 &ump
->metadata_track
);
535 mutex_enter(&priv
->discstrat_mutex
);
539 if (new_queue
!= priv
->cur_queue
) {
540 DPRINTF(SHEDULE
, ("switching from %d to %d\n",
541 priv
->cur_queue
, new_queue
));
544 priv
->cur_queue
= new_queue
;
549 udf_discstrat_thread(void *arg
)
551 struct udf_mount
*ump
= (struct udf_mount
*) arg
;
552 struct strat_private
*priv
= PRIV(ump
);
556 mutex_enter(&priv
->discstrat_mutex
);
557 while (priv
->run_thread
|| !empty
) {
558 /* process the current selected queue */
560 empty
= (bufq_peek(priv
->queues
[UDF_SHED_READING
]) == NULL
);
561 empty
&= (bufq_peek(priv
->queues
[UDF_SHED_WRITING
]) == NULL
);
562 empty
&= (bufq_peek(priv
->queues
[UDF_SHED_SEQWRITING
]) == NULL
);
564 /* wait for more if needed */
566 cv_timedwait(&priv
->discstrat_cv
,
567 &priv
->discstrat_mutex
, hz
/8);
569 mutex_exit(&priv
->discstrat_mutex
);
571 wakeup(&priv
->run_thread
);
576 /* --------------------------------------------------------------------- */
579 udf_discstrat_init_seq(struct udf_strat_args
*args
)
581 struct udf_mount
*ump
= args
->ump
;
582 struct strat_private
*priv
= PRIV(ump
);
583 struct disk_strategy dkstrat
;
587 KASSERT(ump
->logical_vol
);
588 KASSERT(priv
== NULL
);
590 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
591 KASSERT(lb_size
> 0);
593 /* initialise our memory space */
594 ump
->strategy_private
= malloc(sizeof(struct strat_private
),
595 M_UDFTEMP
, M_WAITOK
);
596 priv
= ump
->strategy_private
;
597 memset(priv
, 0 , sizeof(struct strat_private
));
599 /* initialise locks */
600 cv_init(&priv
->discstrat_cv
, "udfstrat");
601 mutex_init(&priv
->discstrat_mutex
, MUTEX_DEFAULT
, IPL_NONE
);
604 * Initialise pool for descriptors associated with nodes. This is done
605 * in lb_size units though currently lb_size is dictated to be
608 pool_init(&priv
->desc_pool
, lb_size
, 0, 0, 0, "udf_desc_pool", NULL
,
612 * remember old device strategy method and explicit set method
613 * `discsort' since we have our own more complex strategy that is not
614 * implementable on the CD device and other strategies will get in the
617 memset(&priv
->old_strategy_setting
, 0,
618 sizeof(struct disk_strategy
));
619 VOP_IOCTL(ump
->devvp
, DIOCGSTRATEGY
, &priv
->old_strategy_setting
,
620 FREAD
| FKIOCTL
, NOCRED
);
621 memset(&dkstrat
, 0, sizeof(struct disk_strategy
));
622 strcpy(dkstrat
.dks_name
, "discsort");
623 VOP_IOCTL(ump
->devvp
, DIOCSSTRATEGY
, &dkstrat
, FWRITE
| FKIOCTL
,
626 /* initialise our internal sheduler */
627 priv
->cur_queue
= UDF_SHED_READING
;
628 bufq_alloc(&priv
->queues
[UDF_SHED_READING
], "disksort",
630 bufq_alloc(&priv
->queues
[UDF_SHED_WRITING
], "disksort",
632 bufq_alloc(&priv
->queues
[UDF_SHED_SEQWRITING
], "fcfs", 0);
633 vfs_timestamp(&priv
->last_queued
[UDF_SHED_READING
]);
634 vfs_timestamp(&priv
->last_queued
[UDF_SHED_WRITING
]);
635 vfs_timestamp(&priv
->last_queued
[UDF_SHED_SEQWRITING
]);
637 /* create our disk strategy thread */
638 priv
->run_thread
= 1;
639 if (kthread_create(PRI_NONE
, 0 /* KTHREAD_MPSAFE*/, NULL
/* cpu_info*/,
640 udf_discstrat_thread
, ump
, &priv
->queue_lwp
,
642 panic("fork udf_rw");
648 udf_discstrat_finish_seq(struct udf_strat_args
*args
)
650 struct udf_mount
*ump
= args
->ump
;
651 struct strat_private
*priv
= PRIV(ump
);
657 /* stop our sheduling thread */
658 KASSERT(priv
->run_thread
== 1);
659 priv
->run_thread
= 0;
660 wakeup(priv
->queue_lwp
);
662 error
= tsleep(&priv
->run_thread
, PRIBIO
+1,
665 /* kthread should be finished now */
667 /* set back old device strategy method */
668 VOP_IOCTL(ump
->devvp
, DIOCSSTRATEGY
, &priv
->old_strategy_setting
,
671 /* destroy our pool */
672 pool_destroy(&priv
->desc_pool
);
674 /* free our private space */
675 free(ump
->strategy_private
, M_UDFTEMP
);
676 ump
->strategy_private
= NULL
;
679 /* --------------------------------------------------------------------- */
681 struct udf_strategy udf_strat_sequential
=
683 udf_create_logvol_dscr_seq
,
684 udf_free_logvol_dscr_seq
,
685 udf_read_logvol_dscr_seq
,
686 udf_write_logvol_dscr_seq
,
688 udf_discstrat_init_seq
,
689 udf_discstrat_finish_seq