1 /* $NetBSD: udf_strat_sequential.c,v 1.14 2015/10/06 08:57:34 hannken Exp $ */
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.14 2015/10/06 08:57:34 hannken Exp $");
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
66 #include "udf_bswap.h"
69 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
70 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
72 /* --------------------------------------------------------------------- */
75 #define UDF_SHED_MAX 3
77 #define UDF_SHED_READING 0
78 #define UDF_SHED_WRITING 1
79 #define UDF_SHED_SEQWRITING 2
81 struct strat_private
{
82 struct pool desc_pool
; /* node descriptors */
85 kcondvar_t discstrat_cv
; /* to wait on */
86 kmutex_t discstrat_mutex
; /* disc strategy */
88 int run_thread
; /* thread control */
91 struct disk_strategy old_strategy_setting
;
92 struct bufq_state
*queues
[UDF_SHED_MAX
];
93 struct timespec last_queued
[UDF_SHED_MAX
];
97 /* --------------------------------------------------------------------- */
100 udf_wr_nodedscr_callback(struct buf
*buf
)
102 struct udf_node
*udf_node
;
105 KASSERT(buf
->b_data
);
107 /* called when write action is done */
108 DPRINTF(WRITE
, ("udf_wr_nodedscr_callback(): node written out\n"));
110 udf_node
= VTOI(buf
->b_vp
);
111 if (udf_node
== NULL
) {
113 printf("udf_wr_node_callback: NULL node?\n");
117 /* XXX right flags to mark dirty again on error? */
119 udf_node
->i_flags
|= IN_MODIFIED
| IN_ACCESSED
;
120 /* XXX TODO reshedule on error */
123 /* decrement outstanding_nodedscr */
124 KASSERT(udf_node
->outstanding_nodedscr
>= 1);
125 udf_node
->outstanding_nodedscr
--;
126 if (udf_node
->outstanding_nodedscr
== 0) {
127 /* first unlock the node */
128 UDF_UNLOCK_NODE(udf_node
, 0);
129 wakeup(&udf_node
->outstanding_nodedscr
);
135 /* --------------------------------------------------------------------- */
138 udf_create_logvol_dscr_seq(struct udf_strat_args
*args
)
140 union dscrptr
**dscrptr
= &args
->dscr
;
141 struct udf_mount
*ump
= args
->ump
;
142 struct strat_private
*priv
= PRIV(ump
);
145 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
146 *dscrptr
= pool_get(&priv
->desc_pool
, PR_WAITOK
);
147 memset(*dscrptr
, 0, lb_size
);
154 udf_free_logvol_dscr_seq(struct udf_strat_args
*args
)
156 union dscrptr
*dscr
= args
->dscr
;
157 struct udf_mount
*ump
= args
->ump
;
158 struct strat_private
*priv
= PRIV(ump
);
160 pool_put(&priv
->desc_pool
, dscr
);
165 udf_read_logvol_dscr_seq(struct udf_strat_args
*args
)
167 union dscrptr
**dscrptr
= &args
->dscr
;
168 union dscrptr
*tmpdscr
;
169 struct udf_mount
*ump
= args
->ump
;
170 struct long_ad
*icb
= args
->icb
;
171 struct strat_private
*priv
= PRIV(ump
);
173 uint32_t sector
, dummy
;
176 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
178 error
= udf_translate_vtop(ump
, icb
, §or
, &dummy
);
182 /* try to read in fe/efe */
183 error
= udf_read_phys_dscr(ump
, sector
, M_UDFTEMP
, &tmpdscr
);
187 *dscrptr
= pool_get(&priv
->desc_pool
, PR_WAITOK
);
188 memcpy(*dscrptr
, tmpdscr
, lb_size
);
189 free(tmpdscr
, M_UDFTEMP
);
196 udf_write_logvol_dscr_seq(struct udf_strat_args
*args
)
198 union dscrptr
*dscr
= args
->dscr
;
199 struct udf_mount
*ump
= args
->ump
;
200 struct udf_node
*udf_node
= args
->udf_node
;
201 struct long_ad
*icb
= args
->icb
;
202 int waitfor
= args
->waitfor
;
203 uint32_t logsectornr
, sectornr
, dummy
;
207 * we have to decide if we write it out sequential or at its fixed
208 * position by examining the partition its (to be) written on.
210 vpart
= udf_rw16(udf_node
->loc
.loc
.part_num
);
211 logsectornr
= udf_rw32(icb
->loc
.lb_num
);
213 if (ump
->vtop_tp
[vpart
] != UDF_VTOP_TYPE_VIRT
) {
214 error
= udf_translate_vtop(ump
, icb
, §ornr
, &dummy
);
220 DPRINTF(WRITE
, ("udf_write_logvol_dscr: sync write\n"));
222 error
= udf_write_phys_dscr_sync(ump
, udf_node
, UDF_C_NODE
,
223 dscr
, sectornr
, logsectornr
);
225 DPRINTF(WRITE
, ("udf_write_logvol_dscr: no wait, async write\n"));
227 error
= udf_write_phys_dscr_async(ump
, udf_node
, UDF_C_NODE
,
228 dscr
, sectornr
, logsectornr
, udf_wr_nodedscr_callback
);
229 /* will be UNLOCKED in call back */
233 udf_node
->outstanding_nodedscr
--;
234 if (udf_node
->outstanding_nodedscr
== 0) {
235 UDF_UNLOCK_NODE(udf_node
, 0);
236 wakeup(&udf_node
->outstanding_nodedscr
);
242 /* --------------------------------------------------------------------- */
245 * Main file-system specific sheduler. Due to the nature of optical media
246 * sheduling can't be performed in the traditional way. Most OS
247 * implementations i've seen thus read or write a file atomically giving all
248 * kinds of side effects.
250 * This implementation uses a kernel thread to shedule the queued requests in
251 * such a way that is semi-optimal for optical media; this means aproximately
252 * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
257 udf_queuebuf_seq(struct udf_strat_args
*args
)
259 struct udf_mount
*ump
= args
->ump
;
260 struct buf
*nestbuf
= args
->nestbuf
;
261 struct strat_private
*priv
= PRIV(ump
);
267 KASSERT(nestbuf
->b_iodone
== nestiobuf_iodone
);
269 what
= nestbuf
->b_udf_c_type
;
270 queue
= UDF_SHED_READING
;
271 if ((nestbuf
->b_flags
& B_READ
) == 0) {
273 queue
= UDF_SHED_SEQWRITING
;
274 if (what
== UDF_C_ABSOLUTE
)
275 queue
= UDF_SHED_WRITING
;
278 /* use our own sheduler lists for more complex sheduling */
279 mutex_enter(&priv
->discstrat_mutex
);
280 bufq_put(priv
->queues
[queue
], nestbuf
);
281 vfs_timestamp(&priv
->last_queued
[queue
]);
282 mutex_exit(&priv
->discstrat_mutex
);
284 /* signal our thread that there might be something to do */
285 cv_signal(&priv
->discstrat_cv
);
288 /* --------------------------------------------------------------------- */
290 /* TODO convert to lb_size */
292 udf_VAT_mapping_update(struct udf_mount
*ump
, struct buf
*buf
, uint32_t lb_map
)
294 union dscrptr
*fdscr
= (union dscrptr
*) buf
->b_data
;
295 struct vnode
*vp
= buf
->b_vp
;
296 struct udf_node
*udf_node
= VTOI(vp
);
298 uint32_t udf_rw32_lbmap
;
299 int c_type
= buf
->b_udf_c_type
;
302 /* only interested when we're using a VAT */
303 KASSERT(ump
->vat_node
);
304 KASSERT(ump
->vtop_alloc
[ump
->node_part
] == UDF_ALLOC_VAT
);
306 /* only nodes are recorded in the VAT */
307 /* NOTE: and the fileset descriptor (FIXME ?) */
308 if (c_type
!= UDF_C_NODE
)
311 udf_rw32_lbmap
= udf_rw32(lb_map
);
313 /* if we're the VAT itself, only update our assigned sector number */
314 if (udf_node
== ump
->vat_node
) {
315 fdscr
->tag
.tag_loc
= udf_rw32_lbmap
;
316 udf_validate_tag_sum(fdscr
);
317 DPRINTF(TRANSLATE
, ("VAT assigned to sector %u\n",
318 udf_rw32(udf_rw32_lbmap
)));
319 /* no use mapping the VAT node in the VAT */
323 /* record new position in VAT file */
324 lb_num
= udf_rw32(fdscr
->tag
.tag_loc
);
326 /* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
328 DPRINTF(TRANSLATE
, ("VAT entry change (log %u -> phys %u)\n",
331 /* VAT should be the longer than this write, can't go wrong */
332 KASSERT(lb_num
<= ump
->vat_entries
);
334 mutex_enter(&ump
->allocate_mutex
);
335 error
= udf_vat_write(ump
->vat_node
,
336 (uint8_t *) &udf_rw32_lbmap
, 4,
337 ump
->vat_offset
+ lb_num
* 4);
338 mutex_exit(&ump
->allocate_mutex
);
341 panic( "udf_VAT_mapping_update: HELP! i couldn't "
342 "write in the VAT file ?\n");
347 udf_issue_buf(struct udf_mount
*ump
, int queue
, struct buf
*buf
)
350 struct long_ad
*node_ad_cpy
;
351 struct part_desc
*pdesc
;
352 uint64_t *lmapping
, *lmappos
;
353 uint32_t sectornr
, bpos
;
357 int sector_size
= ump
->discinfo
.sector_size
;
358 int blks
= sector_size
/ DEV_BSIZE
;
361 /* if reading, just pass to the device's STRATEGY */
362 if (queue
== UDF_SHED_READING
) {
363 DPRINTF(SHEDULE
, ("\nudf_issue_buf READ %p : sector %d type %d,"
364 "b_resid %d, b_bcount %d, b_bufsize %d\n",
365 buf
, (uint32_t) buf
->b_blkno
/ blks
, buf
->b_udf_c_type
,
366 buf
->b_resid
, buf
->b_bcount
, buf
->b_bufsize
));
367 VOP_STRATEGY(ump
->devvp
, buf
);
371 if (queue
== UDF_SHED_WRITING
) {
372 DPRINTF(SHEDULE
, ("\nudf_issue_buf WRITE %p : sector %d "
373 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
374 buf
, (uint32_t) buf
->b_blkno
/ blks
, buf
->b_udf_c_type
,
375 buf
->b_resid
, buf
->b_bcount
, buf
->b_bufsize
));
376 KASSERT(buf
->b_udf_c_type
== UDF_C_ABSOLUTE
);
378 // udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
379 VOP_STRATEGY(ump
->devvp
, buf
);
383 KASSERT(queue
== UDF_SHED_SEQWRITING
);
384 DPRINTF(SHEDULE
, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
385 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
386 buf
, buf
->b_udf_c_type
, buf
->b_resid
, buf
->b_bcount
,
390 * Buffers should not have been allocated to disc addresses yet on
391 * this queue. Note that a buffer can get multiple extents allocated.
393 * lmapping contains lb_num relative to base partition.
395 lmapping
= ump
->la_lmapping
;
396 node_ad_cpy
= ump
->la_node_ad_cpy
;
398 /* logically allocate buf and map it in the file */
399 udf_late_allocate_buf(ump
, buf
, lmapping
, node_ad_cpy
, &vpart_num
);
402 * NOTE We are using the knowledge here that sequential media will
403 * always be mapped linearly. Thus no use to explicitly translate the
407 /* calculate offset from physical base partition */
408 pdesc
= ump
->partitions
[ump
->vtop
[vpart_num
]];
409 ptov
= udf_rw32(pdesc
->start_loc
);
411 /* set buffers blkno to the physical block number */
412 buf
->b_blkno
= (*lmapping
+ ptov
) * blks
;
414 /* fixate floating descriptors */
415 if (buf
->b_udf_c_type
== UDF_C_FLOAT_DSCR
) {
416 /* set our tag location to the absolute position */
417 dscr
= (union dscrptr
*) buf
->b_data
;
418 dscr
->tag
.tag_loc
= udf_rw32(*lmapping
+ ptov
);
419 udf_validate_tag_and_crc_sums(dscr
);
422 /* update mapping in the VAT */
423 if (buf
->b_udf_c_type
== UDF_C_NODE
) {
424 udf_VAT_mapping_update(ump
, buf
, *lmapping
);
425 udf_fixup_node_internals(ump
, buf
->b_data
, buf
->b_udf_c_type
);
428 /* if we have FIDs, fixup using the new allocation table */
429 if (buf
->b_udf_c_type
== UDF_C_FIDS
) {
430 buf_len
= buf
->b_bcount
;
434 sectornr
= *lmappos
++;
435 len
= MIN(buf_len
, sector_size
);
436 fidblk
= (uint8_t *) buf
->b_data
+ bpos
;
437 udf_fixup_fid_block(fidblk
, sector_size
,
444 VOP_STRATEGY(ump
->devvp
, buf
);
449 udf_doshedule(struct udf_mount
*ump
)
452 struct timespec now
, *last
;
453 struct strat_private
*priv
= PRIV(ump
);
454 void (*b_callback
)(struct buf
*);
458 buf
= bufq_get(priv
->queues
[priv
->cur_queue
]);
460 /* transfer from the current queue to the device queue */
461 mutex_exit(&priv
->discstrat_mutex
);
463 /* transform buffer to synchronous; XXX needed? */
464 b_callback
= buf
->b_iodone
;
465 buf
->b_iodone
= NULL
;
466 CLR(buf
->b_flags
, B_ASYNC
);
468 /* issue and wait on completion */
469 udf_issue_buf(ump
, priv
->cur_queue
, buf
);
472 mutex_enter(&priv
->discstrat_mutex
);
474 /* if there is an error, repair this error, otherwise propagate */
475 if (buf
->b_error
&& ((buf
->b_flags
& B_READ
) == 0)) {
476 /* check what we need to do */
477 panic("UDF write error, can't handle yet!\n");
480 /* propagate result to higher layers */
482 buf
->b_iodone
= b_callback
;
483 (*buf
->b_iodone
)(buf
);
489 /* Check if we're idling in this state */
491 last
= &priv
->last_queued
[priv
->cur_queue
];
492 if (ump
->discinfo
.mmc_class
== MMC_CLASS_CD
) {
493 /* dont switch too fast for CD media; its expensive in time */
494 if (now
.tv_sec
- last
->tv_sec
< 3)
498 /* check if we can/should switch */
499 new_queue
= priv
->cur_queue
;
501 if (bufq_peek(priv
->queues
[UDF_SHED_READING
]))
502 new_queue
= UDF_SHED_READING
;
503 if (bufq_peek(priv
->queues
[UDF_SHED_WRITING
])) /* only for unmount */
504 new_queue
= UDF_SHED_WRITING
;
505 if (bufq_peek(priv
->queues
[UDF_SHED_SEQWRITING
]))
506 new_queue
= UDF_SHED_SEQWRITING
;
507 if (priv
->cur_queue
== UDF_SHED_READING
) {
508 if (new_queue
== UDF_SHED_SEQWRITING
) {
509 /* TODO use flag to signal if this is needed */
510 mutex_exit(&priv
->discstrat_mutex
);
512 /* update trackinfo for data and metadata */
513 error
= udf_update_trackinfo(ump
,
516 error
= udf_update_trackinfo(ump
,
517 &ump
->metadata_track
);
519 mutex_enter(&priv
->discstrat_mutex
);
524 if (new_queue
!= priv
->cur_queue
) {
525 DPRINTF(SHEDULE
, ("switching from %d to %d\n",
526 priv
->cur_queue
, new_queue
));
529 priv
->cur_queue
= new_queue
;
534 udf_discstrat_thread(void *arg
)
536 struct udf_mount
*ump
= (struct udf_mount
*) arg
;
537 struct strat_private
*priv
= PRIV(ump
);
541 mutex_enter(&priv
->discstrat_mutex
);
542 while (priv
->run_thread
|| !empty
) {
543 /* process the current selected queue */
545 empty
= (bufq_peek(priv
->queues
[UDF_SHED_READING
]) == NULL
);
546 empty
&= (bufq_peek(priv
->queues
[UDF_SHED_WRITING
]) == NULL
);
547 empty
&= (bufq_peek(priv
->queues
[UDF_SHED_SEQWRITING
]) == NULL
);
549 /* wait for more if needed */
551 cv_timedwait(&priv
->discstrat_cv
,
552 &priv
->discstrat_mutex
, hz
/8);
554 mutex_exit(&priv
->discstrat_mutex
);
556 wakeup(&priv
->run_thread
);
561 /* --------------------------------------------------------------------- */
564 udf_discstrat_init_seq(struct udf_strat_args
*args
)
566 struct udf_mount
*ump
= args
->ump
;
567 struct strat_private
*priv
= PRIV(ump
);
568 struct disk_strategy dkstrat
;
572 KASSERT(ump
->logical_vol
);
573 KASSERT(priv
== NULL
);
575 lb_size
= udf_rw32(ump
->logical_vol
->lb_size
);
576 KASSERT(lb_size
> 0);
578 /* initialise our memory space */
579 ump
->strategy_private
= malloc(sizeof(struct strat_private
),
580 M_UDFTEMP
, M_WAITOK
);
581 priv
= ump
->strategy_private
;
582 memset(priv
, 0 , sizeof(struct strat_private
));
584 /* initialise locks */
585 cv_init(&priv
->discstrat_cv
, "udfstrat");
586 mutex_init(&priv
->discstrat_mutex
, MUTEX_DEFAULT
, IPL_NONE
);
589 * Initialise pool for descriptors associated with nodes. This is done
590 * in lb_size units though currently lb_size is dictated to be
593 pool_init(&priv
->desc_pool
, lb_size
, 0, 0, 0, "udf_desc_pool", NULL
,
597 * remember old device strategy method and explicit set method
598 * `discsort' since we have our own more complex strategy that is not
599 * implementable on the CD device and other strategies will get in the
602 memset(&priv
->old_strategy_setting
, 0,
603 sizeof(struct disk_strategy
));
604 VOP_IOCTL(ump
->devvp
, DIOCGSTRATEGY
, &priv
->old_strategy_setting
,
605 FREAD
| FKIOCTL
, NOCRED
);
606 memset(&dkstrat
, 0, sizeof(struct disk_strategy
));
607 strcpy(dkstrat
.dks_name
, "discsort");
608 VOP_IOCTL(ump
->devvp
, DIOCSSTRATEGY
, &dkstrat
, FWRITE
| FKIOCTL
,
611 /* initialise our internal sheduler */
612 priv
->cur_queue
= UDF_SHED_READING
;
613 bufq_alloc(&priv
->queues
[UDF_SHED_READING
], "disksort",
615 bufq_alloc(&priv
->queues
[UDF_SHED_WRITING
], "disksort",
617 bufq_alloc(&priv
->queues
[UDF_SHED_SEQWRITING
], "fcfs", 0);
618 vfs_timestamp(&priv
->last_queued
[UDF_SHED_READING
]);
619 vfs_timestamp(&priv
->last_queued
[UDF_SHED_WRITING
]);
620 vfs_timestamp(&priv
->last_queued
[UDF_SHED_SEQWRITING
]);
622 /* create our disk strategy thread */
623 priv
->run_thread
= 1;
624 if (kthread_create(PRI_NONE
, 0 /* KTHREAD_MPSAFE*/, NULL
/* cpu_info*/,
625 udf_discstrat_thread
, ump
, &priv
->queue_lwp
,
627 panic("fork udf_rw");
633 udf_discstrat_finish_seq(struct udf_strat_args
*args
)
635 struct udf_mount
*ump
= args
->ump
;
636 struct strat_private
*priv
= PRIV(ump
);
642 /* stop our sheduling thread */
643 KASSERT(priv
->run_thread
== 1);
644 priv
->run_thread
= 0;
645 wakeup(priv
->queue_lwp
);
647 error
= tsleep(&priv
->run_thread
, PRIBIO
+1,
650 /* kthread should be finished now */
652 /* set back old device strategy method */
653 VOP_IOCTL(ump
->devvp
, DIOCSSTRATEGY
, &priv
->old_strategy_setting
,
656 /* destroy our pool */
657 pool_destroy(&priv
->desc_pool
);
659 mutex_destroy(&priv
->discstrat_mutex
);
660 cv_destroy(&priv
->discstrat_cv
);
662 /* free our private space */
663 free(ump
->strategy_private
, M_UDFTEMP
);
664 ump
->strategy_private
= NULL
;
667 /* --------------------------------------------------------------------- */
669 struct udf_strategy udf_strat_sequential
=
671 udf_create_logvol_dscr_seq
,
672 udf_free_logvol_dscr_seq
,
673 udf_read_logvol_dscr_seq
,
674 udf_write_logvol_dscr_seq
,
676 udf_discstrat_init_seq
,
677 udf_discstrat_finish_seq