dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / udfs / udf_inode.c
blob9c6ed55ca3b6212a5ecbdc7f56524f32cc201072
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017 by Delphix. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/time.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/resource.h>
33 #include <sys/signal.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/stat.h>
39 #include <sys/vnode.h>
40 #include <sys/mode.h>
41 #include <sys/proc.h>
42 #include <sys/disp.h>
43 #include <sys/file.h>
44 #include <sys/fcntl.h>
45 #include <sys/flock.h>
46 #include <sys/kmem.h>
47 #include <sys/uio.h>
48 #include <sys/dnlc.h>
49 #include <sys/conf.h>
50 #include <sys/errno.h>
51 #include <sys/mman.h>
52 #include <sys/fbuf.h>
53 #include <sys/pathname.h>
54 #include <sys/debug.h>
55 #include <sys/vmsystm.h>
56 #include <sys/cmn_err.h>
57 #include <sys/dirent.h>
58 #include <sys/errno.h>
59 #include <sys/modctl.h>
60 #include <sys/statvfs.h>
61 #include <sys/mount.h>
62 #include <sys/sunddi.h>
63 #include <sys/bootconf.h>
64 #include <sys/policy.h>
66 #include <vm/hat.h>
67 #include <vm/page.h>
68 #include <vm/pvn.h>
69 #include <vm/as.h>
70 #include <vm/seg.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_kmem.h>
73 #include <vm/seg_vn.h>
74 #include <vm/rm.h>
75 #include <vm/page.h>
76 #include <sys/swap.h>
79 #include <sys/fs_subr.h>
82 #include <sys/fs/udf_volume.h>
83 #include <sys/fs/udf_inode.h>
85 extern const struct vnodeops udf_vnodeops;
87 kmutex_t ud_sync_busy;
89 * udf_vfs list manipulation routines
91 kmutex_t udf_vfs_mutex;
92 struct udf_vfs *udf_vfs_instances;
93 _NOTE(MUTEX_PROTECTS_DATA(udf_vfs_mutex, udf_vfs_instances))
95 union ihead ud_ihead[UD_HASH_SZ];
96 kmutex_t ud_icache_lock;
98 #define UD_BEGIN 0x0
99 #define UD_END 0x1
100 #define UD_UNKN 0x2
101 struct ud_inode *udf_ifreeh, *udf_ifreet;
102 kmutex_t udf_ifree_lock;
103 _NOTE(MUTEX_PROTECTS_DATA(udf_ifree_lock, udf_ifreeh))
104 _NOTE(MUTEX_PROTECTS_DATA(udf_ifree_lock, udf_ifreet))
106 kmutex_t ud_nino_lock;
107 int32_t ud_max_inodes = 512;
108 int32_t ud_cur_inodes = 0;
109 _NOTE(MUTEX_PROTECTS_DATA(ud_nino_lock, ud_cur_inodes))
111 uid_t ud_default_uid = 0;
112 gid_t ud_default_gid = 3;
114 int32_t ud_updat_ext4(struct ud_inode *, struct file_entry *);
115 int32_t ud_updat_ext4096(struct ud_inode *, struct file_entry *);
116 void ud_make_sad(struct icb_ext *, struct short_ad *, int32_t);
117 void ud_make_lad(struct icb_ext *, struct long_ad *, int32_t);
118 void ud_trunc_ext4(struct ud_inode *, uoff_t);
119 void ud_trunc_ext4096(struct ud_inode *, uoff_t);
120 void ud_add_to_free_list(struct ud_inode *, uint32_t);
121 void ud_remove_from_free_list(struct ud_inode *, uint32_t);
124 #ifdef DEBUG
125 struct ud_inode *
126 ud_search_icache(struct vfs *vfsp, uint16_t prn, uint32_t ploc)
128 int32_t hno;
129 union ihead *ih;
130 struct ud_inode *ip;
131 struct udf_vfs *udf_vfsp;
132 uint32_t loc, dummy;
134 udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
135 loc = ud_xlate_to_daddr(udf_vfsp, prn, ploc, 1, &dummy);
137 mutex_enter(&ud_icache_lock);
138 hno = UD_INOHASH(vfsp->vfs_dev, loc);
139 ih = &ud_ihead[hno];
140 for (ip = ih->ih_chain[0];
141 ip != (struct ud_inode *)ih;
142 ip = ip->i_forw) {
143 if ((prn == ip->i_icb_prn) && (ploc == ip->i_icb_block) &&
144 (vfsp->vfs_dev == ip->i_dev)) {
145 mutex_exit(&ud_icache_lock);
146 return (ip);
149 mutex_exit(&ud_icache_lock);
150 return (0);
152 #endif
154 /* ARGSUSED */
156 ud_iget(struct vfs *vfsp, uint16_t prn, uint32_t ploc, struct ud_inode **ipp,
157 struct buf *pbp, struct cred *cred)
159 int32_t hno, nomem = 0, icb_tag_flags;
160 union ihead *ih;
161 struct ud_inode *ip;
162 struct vnode *vp;
163 struct buf *bp = NULL;
164 struct file_entry *fe;
165 struct udf_vfs *udf_vfsp;
166 struct ext_attr_hdr *eah;
167 struct attr_hdr *ah;
168 int32_t ea_len, ea_off;
169 daddr_t loc;
170 uint64_t offset = 0;
171 struct icb_ext *iext, *con;
172 uint32_t length, dummy;
173 int32_t ndesc, ftype;
174 uint16_t old_prn;
175 uint32_t old_block, old_lbano;
177 ud_printf("ud_iget\n");
178 udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
179 old_prn = 0;
180 old_block = old_lbano = 0;
181 ftype = 0;
182 loc = ud_xlate_to_daddr(udf_vfsp, prn, ploc, 1, &dummy);
183 loop:
184 mutex_enter(&ud_icache_lock);
185 hno = UD_INOHASH(vfsp->vfs_dev, loc);
187 ih = &ud_ihead[hno];
188 for (ip = ih->ih_chain[0];
189 ip != (struct ud_inode *)ih;
190 ip = ip->i_forw) {
192 if ((prn == ip->i_icb_prn) &&
193 (ploc == ip->i_icb_block) &&
194 (vfsp->vfs_dev == ip->i_dev)) {
196 vp = ITOV(ip);
197 VN_HOLD(vp);
198 mutex_exit(&ud_icache_lock);
200 rw_enter(&ip->i_contents, RW_READER);
201 mutex_enter(&ip->i_tlock);
202 if ((ip->i_flag & IREF) == 0) {
203 mutex_enter(&udf_ifree_lock);
204 ud_remove_from_free_list(ip, UD_UNKN);
205 mutex_exit(&udf_ifree_lock);
207 ip->i_flag |= IREF;
208 mutex_exit(&ip->i_tlock);
209 rw_exit(&ip->i_contents);
211 *ipp = ip;
213 if (pbp != NULL) {
214 brelse(pbp);
217 return (0);
222 * We don't have it in the cache
223 * Allocate a new entry
225 tryagain:
226 mutex_enter(&udf_ifree_lock);
227 mutex_enter(&ud_nino_lock);
228 if (ud_cur_inodes > ud_max_inodes) {
229 int32_t purged;
231 mutex_exit(&ud_nino_lock);
232 while (udf_ifreeh == NULL ||
233 vn_has_cached_data(ITOV(udf_ifreeh))) {
235 * Try to put an inode on the freelist that's
236 * sitting in the dnlc.
238 mutex_exit(&udf_ifree_lock);
239 purged = dnlc_fs_purge1(&udf_vnodeops);
240 mutex_enter(&udf_ifree_lock);
241 if (!purged) {
242 break;
245 mutex_enter(&ud_nino_lock);
249 * If there's a free one available and it has no pages attached
250 * take it. If we're over the high water mark, take it even if
251 * it has attached pages. Otherwise, make a new one.
253 if (udf_ifreeh &&
254 (nomem || !vn_has_cached_data(ITOV(udf_ifreeh)) ||
255 ud_cur_inodes >= ud_max_inodes)) {
257 mutex_exit(&ud_nino_lock);
258 ip = udf_ifreeh;
259 vp = ITOV(ip);
261 ud_remove_from_free_list(ip, UD_BEGIN);
263 mutex_exit(&udf_ifree_lock);
264 if (ip->i_flag & IREF) {
265 cmn_err(CE_WARN, "ud_iget: bad i_flag\n");
266 mutex_exit(&ud_icache_lock);
267 if (pbp != NULL) {
268 brelse(pbp);
270 return (EINVAL);
272 rw_enter(&ip->i_contents, RW_WRITER);
275 * We call udf_syncip() to synchronously destroy all pages
276 * associated with the vnode before re-using it. The pageout
277 * thread may have beat us to this page so our v_count can
278 * be > 0 at this point even though we are on the freelist.
280 mutex_enter(&ip->i_tlock);
281 ip->i_flag = (ip->i_flag & IMODTIME) | IREF;
282 mutex_exit(&ip->i_tlock);
284 VN_HOLD(vp);
285 if (ud_syncip(ip, B_INVAL, I_SYNC) != 0) {
286 ud_idrop(ip);
287 rw_exit(&ip->i_contents);
288 mutex_exit(&ud_icache_lock);
289 goto loop;
292 mutex_enter(&ip->i_tlock);
293 ip->i_flag &= ~IMODTIME;
294 mutex_exit(&ip->i_tlock);
296 if (ip->i_ext) {
297 kmem_free(ip->i_ext,
298 sizeof (struct icb_ext) * ip->i_ext_count);
299 ip->i_ext = 0;
300 ip->i_ext_count = ip->i_ext_used = 0;
303 if (ip->i_con) {
304 kmem_free(ip->i_con,
305 sizeof (struct icb_ext) * ip->i_con_count);
306 ip->i_con = 0;
307 ip->i_con_count = ip->i_con_used = ip->i_con_read = 0;
311 * The pageout thread may not have had a chance to release
312 * its hold on the vnode (if it was active with this vp),
313 * but the pages should all be invalidated.
315 } else {
316 mutex_exit(&ud_nino_lock);
317 mutex_exit(&udf_ifree_lock);
319 * Try to get memory for this inode without blocking.
320 * If we can't and there is something on the freelist,
321 * go ahead and use it, otherwise block waiting for
322 * memory holding the hash_lock. We expose a potential
323 * deadlock if all users of memory have to do a ud_iget()
324 * before releasing memory.
326 ip = kmem_zalloc(sizeof (struct ud_inode),
327 KM_NOSLEEP);
328 vp = vn_alloc(KM_NOSLEEP);
329 if ((ip == NULL) || (vp == NULL)) {
330 mutex_enter(&udf_ifree_lock);
331 if (udf_ifreeh) {
332 mutex_exit(&udf_ifree_lock);
333 if (ip != NULL)
334 kmem_free(ip, sizeof (struct ud_inode));
335 if (vp != NULL)
336 vn_free(vp);
337 nomem = 1;
338 goto tryagain;
339 } else {
340 mutex_exit(&udf_ifree_lock);
341 if (ip == NULL)
342 ip = (struct ud_inode *)
343 kmem_zalloc(
344 sizeof (struct ud_inode),
345 KM_SLEEP);
346 if (vp == NULL)
347 vp = vn_alloc(KM_SLEEP);
350 ip->i_vnode = vp;
352 ip->i_marker1 = (uint32_t)0xAAAAAAAA;
353 ip->i_marker2 = (uint32_t)0xBBBBBBBB;
354 ip->i_marker3 = (uint32_t)0xCCCCCCCC;
356 rw_init(&ip->i_rwlock, NULL, RW_DEFAULT, NULL);
357 rw_init(&ip->i_contents, NULL, RW_DEFAULT, NULL);
358 mutex_init(&ip->i_tlock, NULL, MUTEX_DEFAULT, NULL);
360 ip->i_forw = ip;
361 ip->i_back = ip;
362 vp->v_data = (caddr_t)ip;
363 vn_setops(vp, &udf_vnodeops);
364 ip->i_flag = IREF;
365 cv_init(&ip->i_wrcv, NULL, CV_DRIVER, NULL);
366 mutex_enter(&ud_nino_lock);
367 ud_cur_inodes++;
368 mutex_exit(&ud_nino_lock);
370 rw_enter(&ip->i_contents, RW_WRITER);
373 if (vp->v_count < 1) {
374 cmn_err(CE_WARN, "ud_iget: v_count < 1\n");
375 mutex_exit(&ud_icache_lock);
376 rw_exit(&ip->i_contents);
377 if (pbp != NULL) {
378 brelse(pbp);
380 return (EINVAL);
382 if (vn_has_cached_data(vp)) {
383 cmn_err(CE_WARN, "ud_iget: v_object list is not NULL\n");
384 mutex_exit(&ud_icache_lock);
385 rw_exit(&ip->i_contents);
386 if (pbp != NULL) {
387 brelse(pbp);
389 return (EINVAL);
393 * Move the inode on the chain for its new (ino, dev) pair
395 remque(ip);
396 ip->i_forw = ip;
397 ip->i_back = ip;
398 insque(ip, ih);
400 ip->i_dev = vfsp->vfs_dev;
401 ip->i_udf = udf_vfsp;
402 ip->i_diroff = 0;
403 ip->i_devvp = ip->i_udf->udf_devvp;
404 ip->i_icb_prn = prn;
405 ip->i_icb_block = ploc;
406 ip->i_icb_lbano = loc;
407 ip->i_nextr = 0;
408 ip->i_seq = 0;
409 mutex_exit(&ud_icache_lock);
411 read_de:
412 if (pbp != NULL) {
414 * assumption is that we will not
415 * create a 4096 file
417 bp = pbp;
418 } else {
419 bp = ud_bread(ip->i_dev,
420 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
421 udf_vfsp->udf_lbsize);
425 * Check I/O errors
427 fe = (struct file_entry *)bp->b_un.b_addr;
428 if ((bp->b_flags & B_ERROR) ||
429 (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
430 ip->i_icb_block, 1, udf_vfsp->udf_lbsize) != 0)) {
432 if (((bp->b_flags & B_ERROR) == 0) &&
433 (ftype == STRAT_TYPE4096)) {
434 if (ud_check_te_unrec(udf_vfsp,
435 bp->b_un.b_addr, ip->i_icb_block) == 0) {
437 brelse(bp);
440 * restore old file entry location
442 ip->i_icb_prn = old_prn;
443 ip->i_icb_block = old_block;
444 ip->i_icb_lbano = old_lbano;
447 * reread old file entry
449 bp = ud_bread(ip->i_dev,
450 old_lbano << udf_vfsp->udf_l2d_shift,
451 udf_vfsp->udf_lbsize);
452 if ((bp->b_flags & B_ERROR) == 0) {
453 fe = (struct file_entry *)
454 bp->b_un.b_addr;
455 if (ud_verify_tag_and_desc(&fe->fe_tag,
456 UD_FILE_ENTRY, ip->i_icb_block, 1,
457 udf_vfsp->udf_lbsize) == 0) {
458 goto end_4096;
463 error_ret:
464 brelse(bp);
466 * The inode may not contain anything useful. Mark it as
467 * having an error and let anyone else who was waiting for
468 * this know there was an error. Callers waiting for
469 * access to this inode in ud_iget will find
470 * the i_icb_lbano == 0, so there won't be a match.
471 * It remains in the cache. Put it back on the freelist.
473 mutex_enter(&vp->v_lock);
474 VN_RELE_LOCKED(vp);
475 mutex_exit(&vp->v_lock);
476 ip->i_icb_lbano = 0;
479 * The folowing two lines make
480 * it impossible for any one do
481 * a VN_HOLD and then a VN_RELE
482 * so avoiding a ud_iinactive
484 ip->i_icb_prn = 0xffff;
485 ip->i_icb_block = 0;
488 * remove the bad inode from hash chains
489 * so that during unmount we will not
490 * go through this inode
492 mutex_enter(&ud_icache_lock);
493 remque(ip);
494 ip->i_forw = ip;
495 ip->i_back = ip;
496 mutex_exit(&ud_icache_lock);
498 /* Put the inode at the front of the freelist */
499 mutex_enter(&ip->i_tlock);
500 mutex_enter(&udf_ifree_lock);
501 ud_add_to_free_list(ip, UD_BEGIN);
502 mutex_exit(&udf_ifree_lock);
503 ip->i_flag = 0;
504 mutex_exit(&ip->i_tlock);
505 rw_exit(&ip->i_contents);
506 return (EIO);
509 if (fe->fe_icb_tag.itag_strategy == SWAP_16(STRAT_TYPE4096)) {
510 struct buf *ibp = NULL;
511 struct indirect_entry *ie;
514 * save old file_entry location
516 old_prn = ip->i_icb_prn;
517 old_block = ip->i_icb_block;
518 old_lbano = ip->i_icb_lbano;
520 ftype = STRAT_TYPE4096;
523 * If astrat is 4096 different versions
524 * of the file exist on the media.
525 * we are supposed to get to the latest
526 * version of the file
530 * IE is supposed to be in the next block
531 * of DE
533 ibp = ud_bread(ip->i_dev,
534 (ip->i_icb_lbano + 1) << udf_vfsp->udf_l2d_shift,
535 udf_vfsp->udf_lbsize);
536 if (ibp->b_flags & B_ERROR) {
538 * Get rid of current ibp and
539 * then goto error on DE's bp
541 ie_error:
542 brelse(ibp);
543 goto error_ret;
546 ie = (struct indirect_entry *)ibp->b_un.b_addr;
547 if (ud_verify_tag_and_desc(&ie->ie_tag,
548 UD_INDIRECT_ENT, ip->i_icb_block + 1,
549 1, udf_vfsp->udf_lbsize) == 0) {
550 struct long_ad *lad;
552 lad = &ie->ie_indirecticb;
553 ip->i_icb_prn = SWAP_16(lad->lad_ext_prn);
554 ip->i_icb_block = SWAP_32(lad->lad_ext_loc);
555 ip->i_icb_lbano = ud_xlate_to_daddr(udf_vfsp,
556 ip->i_icb_prn, ip->i_icb_block,
557 1, &dummy);
558 brelse(ibp);
559 brelse(bp);
560 goto read_de;
564 * If this block is TE or unrecorded we
565 * are at the last entry
567 if (ud_check_te_unrec(udf_vfsp, ibp->b_un.b_addr,
568 ip->i_icb_block + 1) != 0) {
570 * This is not an unrecorded block
571 * Check if it a valid IE and
572 * get the address of DE that
573 * this IE points to
575 goto ie_error;
578 * If ud_check_unrec returns "0"
579 * this is the last in the chain
580 * Latest file_entry
582 brelse(ibp);
585 end_4096:
587 ip->i_uid = SWAP_32(fe->fe_uid);
588 if (ip->i_uid == -1) {
589 ip->i_uid = ud_default_uid;
591 ip->i_gid = SWAP_32(fe->fe_gid);
592 if (ip->i_gid == -1) {
593 ip->i_gid = ud_default_gid;
595 ip->i_perm = SWAP_32(fe->fe_perms) & 0xFFFF;
596 if (fe->fe_icb_tag.itag_strategy == SWAP_16(STRAT_TYPE4096)) {
597 ip->i_perm &= ~(IWRITE | (IWRITE >> 5) | (IWRITE >> 10));
600 ip->i_nlink = SWAP_16(fe->fe_lcount);
601 ip->i_size = SWAP_64(fe->fe_info_len);
602 ip->i_lbr = SWAP_64(fe->fe_lbr);
604 ud_dtime2utime(&ip->i_atime, &fe->fe_acc_time);
605 ud_dtime2utime(&ip->i_mtime, &fe->fe_mod_time);
606 ud_dtime2utime(&ip->i_ctime, &fe->fe_attr_time);
609 ip->i_uniqid = SWAP_64(fe->fe_uniq_id);
610 icb_tag_flags = SWAP_16(fe->fe_icb_tag.itag_flags);
612 if ((fe->fe_icb_tag.itag_ftype == FTYPE_CHAR_DEV) ||
613 (fe->fe_icb_tag.itag_ftype == FTYPE_BLOCK_DEV)) {
615 eah = (struct ext_attr_hdr *)fe->fe_spec;
616 ea_off = GET_32(&eah->eah_ial);
617 ea_len = GET_32(&fe->fe_len_ear);
618 if (ea_len && (ud_verify_tag_and_desc(&eah->eah_tag,
619 UD_EXT_ATTR_HDR, ip->i_icb_block, 1,
620 sizeof (struct file_entry) -
621 offsetof(struct file_entry, fe_spec)) == 0)) {
623 while (ea_off < ea_len) {
625 * We now check the validity of ea_off.
626 * (ea_len - ea_off) should be large enough to
627 * hold the attribute header atleast.
629 if ((ea_len - ea_off) <
630 sizeof (struct attr_hdr)) {
631 cmn_err(CE_NOTE,
632 "ea_len(0x%x) - ea_off(0x%x) is "
633 "too small to hold attr. info. "
634 "blockno 0x%x\n",
635 ea_len, ea_off, ip->i_icb_block);
636 goto error_ret;
638 ah = (struct attr_hdr *)&fe->fe_spec[ea_off];
641 * Device Specification EA
643 if ((GET_32(&ah->ahdr_atype) == 12) &&
644 (ah->ahdr_astype == 1)) {
645 struct dev_spec_ear *ds;
647 if ((ea_len - ea_off) <
648 sizeof (struct dev_spec_ear)) {
649 cmn_err(CE_NOTE,
650 "ea_len(0x%x) - "
651 "ea_off(0x%x) is too small "
652 "to hold dev_spec_ear."
653 " blockno 0x%x\n",
654 ea_len, ea_off,
655 ip->i_icb_block);
656 goto error_ret;
658 ds = (struct dev_spec_ear *)ah;
659 ip->i_major = GET_32(&ds->ds_major_id);
660 ip->i_minor = GET_32(&ds->ds_minor_id);
664 * Impl Use EA
666 if ((GET_32(&ah->ahdr_atype) == 2048) &&
667 (ah->ahdr_astype == 1)) {
668 struct iu_ea *iuea;
669 struct copy_mgt_info *cmi;
671 if ((ea_len - ea_off) <
672 sizeof (struct iu_ea)) {
673 cmn_err(CE_NOTE,
674 "ea_len(0x%x) - ea_off(0x%x) is too small to hold iu_ea. blockno 0x%x\n",
675 ea_len, ea_off,
676 ip->i_icb_block);
677 goto error_ret;
679 iuea = (struct iu_ea *)ah;
680 if (strncmp(iuea->iuea_ii.reg_id,
681 UDF_FREEEASPACE,
682 sizeof (iuea->iuea_ii.reg_id))
683 == 0) {
684 /* skip it */
685 iuea = iuea;
686 } else if (strncmp(iuea->iuea_ii.reg_id,
687 UDF_CGMS_INFO,
688 sizeof (iuea->iuea_ii.reg_id))
689 == 0) {
690 cmi = (struct copy_mgt_info *)
691 iuea->iuea_iu;
692 cmi = cmi;
695 /* ??? PARANOIA */
696 if (GET_32(&ah->ahdr_length) == 0) {
697 break;
699 ea_off += GET_32(&ah->ahdr_length);
704 ip->i_nextr = 0;
706 ip->i_maxent = SWAP_16(fe->fe_icb_tag.itag_max_ent);
707 ip->i_astrat = SWAP_16(fe->fe_icb_tag.itag_strategy);
709 ip->i_desc_type = icb_tag_flags & 0x7;
711 /* Strictly Paranoia */
712 ip->i_ext = NULL;
713 ip->i_ext_count = ip->i_ext_used = 0;
714 ip->i_con = 0;
715 ip->i_con_count = ip->i_con_used = ip->i_con_read = 0;
717 ip->i_data_off = 0xB0 + SWAP_32(fe->fe_len_ear);
718 ip->i_max_emb = udf_vfsp->udf_lbsize - ip->i_data_off;
719 if (ip->i_desc_type == ICB_FLAG_SHORT_AD) {
720 /* Short allocation desc */
721 struct short_ad *sad;
723 ip->i_ext_used = 0;
724 ip->i_ext_count = ndesc =
725 SWAP_32(fe->fe_len_adesc) / sizeof (struct short_ad);
726 ip->i_ext_count =
727 ((ip->i_ext_count / EXT_PER_MALLOC) + 1) * EXT_PER_MALLOC;
728 ip->i_ext = kmem_zalloc(ip->i_ext_count *
729 sizeof (struct icb_ext), KM_SLEEP);
730 ip->i_cur_max_ext = ip->i_max_emb / sizeof (struct short_ad);
731 ip->i_cur_max_ext --;
733 if ((ip->i_astrat != STRAT_TYPE4) &&
734 (ip->i_astrat != STRAT_TYPE4096)) {
735 goto error_ret;
738 sad = (struct short_ad *)
739 (fe->fe_spec + SWAP_32(fe->fe_len_ear));
740 iext = ip->i_ext;
741 while (ndesc --) {
742 length = SWAP_32(sad->sad_ext_len);
743 if ((length & 0x3FFFFFFF) == 0) {
744 break;
746 if (((length >> 30) & IB_MASK) == IB_CON) {
747 if (ip->i_con == NULL) {
748 ip->i_con_count = EXT_PER_MALLOC;
749 ip->i_con_used = 0;
750 ip->i_con_read = 0;
751 ip->i_con = kmem_zalloc(
752 ip->i_con_count *
753 sizeof (struct icb_ext),
754 KM_SLEEP);
756 con = &ip->i_con[ip->i_con_used];
757 con->ib_prn = 0;
758 con->ib_block = SWAP_32(sad->sad_ext_loc);
759 con->ib_count = length & 0x3FFFFFFF;
760 con->ib_flags = (length >> 30) & IB_MASK;
761 ip->i_con_used++;
762 sad ++;
763 break;
765 iext->ib_prn = 0;
766 iext->ib_block = SWAP_32(sad->sad_ext_loc);
767 length = SWAP_32(sad->sad_ext_len);
768 iext->ib_count = length & 0x3FFFFFFF;
769 iext->ib_offset = offset;
770 iext->ib_marker1 = (uint32_t)0xAAAAAAAA;
771 iext->ib_marker2 = (uint32_t)0xBBBBBBBB;
772 offset += (iext->ib_count + udf_vfsp->udf_lbmask) &
773 (~udf_vfsp->udf_lbmask);
775 iext->ib_flags = (length >> 30) & IB_MASK;
777 ip->i_ext_used++;
778 iext++;
779 sad ++;
781 } else if (ip->i_desc_type == ICB_FLAG_LONG_AD) {
782 /* Long allocation desc */
783 struct long_ad *lad;
785 ip->i_ext_used = 0;
786 ip->i_ext_count = ndesc =
787 SWAP_32(fe->fe_len_adesc) / sizeof (struct long_ad);
788 ip->i_ext_count =
789 ((ip->i_ext_count / EXT_PER_MALLOC) + 1) * EXT_PER_MALLOC;
790 ip->i_ext = kmem_zalloc(ip->i_ext_count *
791 sizeof (struct icb_ext), KM_SLEEP);
793 ip->i_cur_max_ext = ip->i_max_emb / sizeof (struct long_ad);
794 ip->i_cur_max_ext --;
796 if ((ip->i_astrat != STRAT_TYPE4) &&
797 (ip->i_astrat != STRAT_TYPE4096)) {
798 goto error_ret;
801 lad = (struct long_ad *)
802 (fe->fe_spec + SWAP_32(fe->fe_len_ear));
803 iext = ip->i_ext;
804 while (ndesc --) {
805 length = SWAP_32(lad->lad_ext_len);
806 if ((length & 0x3FFFFFFF) == 0) {
807 break;
809 if (((length >> 30) & IB_MASK) == IB_CON) {
810 if (ip->i_con == NULL) {
811 ip->i_con_count = EXT_PER_MALLOC;
812 ip->i_con_used = 0;
813 ip->i_con_read = 0;
814 ip->i_con = kmem_zalloc(
815 ip->i_con_count *
816 sizeof (struct icb_ext),
817 KM_SLEEP);
819 con = &ip->i_con[ip->i_con_used];
820 con->ib_prn = SWAP_16(lad->lad_ext_prn);
821 con->ib_block = SWAP_32(lad->lad_ext_loc);
822 con->ib_count = length & 0x3FFFFFFF;
823 con->ib_flags = (length >> 30) & IB_MASK;
824 ip->i_con_used++;
825 lad ++;
826 break;
828 iext->ib_prn = SWAP_16(lad->lad_ext_prn);
829 iext->ib_block = SWAP_32(lad->lad_ext_loc);
830 iext->ib_count = length & 0x3FFFFFFF;
831 iext->ib_offset = offset;
832 iext->ib_marker1 = (uint32_t)0xAAAAAAAA;
833 iext->ib_marker2 = (uint32_t)0xBBBBBBBB;
834 offset += (iext->ib_count + udf_vfsp->udf_lbmask) &
835 (~udf_vfsp->udf_lbmask);
837 iext->ib_flags = (length >> 30) & IB_MASK;
839 ip->i_ext_used++;
840 iext++;
841 lad ++;
843 } else if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
844 ASSERT(SWAP_32(fe->fe_len_ear) < udf_vfsp->udf_lbsize);
846 if (SWAP_32(fe->fe_len_ear) > udf_vfsp->udf_lbsize) {
847 goto error_ret;
849 } else {
850 /* Not to be used in UDF 1.50 */
851 cmn_err(CE_NOTE, "Invalid Allocation Descriptor type %x\n",
852 ip->i_desc_type);
853 goto error_ret;
857 if (icb_tag_flags & ICB_FLAG_SETUID) {
858 ip->i_char = ISUID;
859 } else {
860 ip->i_char = 0;
862 if (icb_tag_flags & ICB_FLAG_SETGID) {
863 ip->i_char |= ISGID;
865 if (icb_tag_flags & ICB_FLAG_STICKY) {
866 ip->i_char |= ISVTX;
868 switch (fe->fe_icb_tag.itag_ftype) {
869 case FTYPE_DIRECTORY :
870 ip->i_type = VDIR;
871 break;
872 case FTYPE_FILE :
873 ip->i_type = VREG;
874 break;
875 case FTYPE_BLOCK_DEV :
876 ip->i_type = VBLK;
877 break;
878 case FTYPE_CHAR_DEV :
879 ip->i_type = VCHR;
880 break;
881 case FTYPE_FIFO :
882 ip->i_type = VFIFO;
883 break;
884 case FTYPE_C_ISSOCK :
885 ip->i_type = VSOCK;
886 break;
887 case FTYPE_SYMLINK :
888 ip->i_type = VLNK;
889 break;
890 default :
891 ip->i_type = VNON;
892 break;
895 if (ip->i_type == VBLK || ip->i_type == VCHR) {
896 ip->i_rdev = makedevice(ip->i_major, ip->i_minor);
900 * Fill in the rest. Don't bother with the vnode lock because nobody
901 * should be looking at this vnode. We have already invalidated the
902 * pages if it had any so pageout shouldn't be referencing this vnode
903 * and we are holding the write contents lock so a look up can't use
904 * the vnode.
906 vp->v_vfsp = vfsp;
907 vp->v_type = ip->i_type;
908 vp->v_rdev = ip->i_rdev;
909 if (ip->i_udf->udf_root_blkno == loc) {
910 vp->v_flag = VROOT;
911 } else {
912 vp->v_flag = 0;
915 brelse(bp);
916 *ipp = ip;
917 rw_exit(&ip->i_contents);
918 vn_exists(vp);
919 return (0);
922 void
923 ud_iinactive(struct ud_inode *ip, struct cred *cr)
925 int32_t busy = 0;
926 struct vnode *vp;
927 vtype_t type;
928 caddr_t addr, addr1;
929 size_t size, size1;
932 ud_printf("ud_iinactive\n");
935 * Get exclusive access to inode data.
937 rw_enter(&ip->i_contents, RW_WRITER);
940 * Make sure no one reclaimed the inode before we put
941 * it on the freelist or destroy it. We keep our 'hold'
942 * on the vnode from vn_rele until we are ready to
943 * do something with the inode (freelist/destroy).
945 * Pageout may put a VN_HOLD/VN_RELE at anytime during this
946 * operation via an async putpage, so we must make sure
947 * we don't free/destroy the inode more than once. ud_iget
948 * may also put a VN_HOLD on the inode before it grabs
949 * the i_contents lock. This is done so we don't kmem_free
950 * an inode that a thread is waiting on.
952 vp = ITOV(ip);
954 mutex_enter(&vp->v_lock);
955 if (vp->v_count < 1) {
956 cmn_err(CE_WARN, "ud_iinactive: v_count < 1\n");
957 return;
959 if ((vp->v_count > 1) || ((ip->i_flag & IREF) == 0)) {
960 VN_RELE_LOCKED(vp);
961 mutex_exit(&vp->v_lock);
962 rw_exit(&ip->i_contents);
963 return;
965 mutex_exit(&vp->v_lock);
968 * For forced umount case: if i_udf is NULL, the contents of
969 * the inode and all the pages have already been pushed back
970 * to disk. It can be safely destroyed.
972 if (ip->i_udf == NULL) {
973 addr = (caddr_t)ip->i_ext;
974 size = sizeof (struct icb_ext) * ip->i_ext_count;
975 ip->i_ext = 0;
976 ip->i_ext_count = ip->i_ext_used = 0;
977 addr1 = (caddr_t)ip->i_con;
978 size1 = sizeof (struct icb_ext) * ip->i_con_count;
979 ip->i_con = 0;
980 ip->i_con_count = ip->i_con_used = ip->i_con_read = 0;
981 rw_exit(&ip->i_contents);
982 vn_invalid(vp);
984 mutex_enter(&ud_nino_lock);
985 ud_cur_inodes--;
986 mutex_exit(&ud_nino_lock);
988 cv_destroy(&ip->i_wrcv); /* throttling */
989 rw_destroy(&ip->i_rwlock);
990 rw_exit(&ip->i_contents);
991 rw_destroy(&ip->i_contents);
992 kmem_free(addr, size);
993 kmem_free(addr1, size1);
994 vn_free(vp);
995 kmem_free(ip, sizeof (struct ud_inode));
996 return;
999 if ((ip->i_udf->udf_flags & UDF_FL_RDONLY) == 0) {
1000 if (ip->i_nlink <= 0) {
1001 ip->i_marker3 = (uint32_t)0xDDDD0000;
1002 ip->i_nlink = 1; /* prevent free-ing twice */
1003 (void) ud_itrunc(ip, 0, 0, cr);
1004 type = ip->i_type;
1005 ip->i_perm = 0;
1006 ip->i_uid = 0;
1007 ip->i_gid = 0;
1008 ip->i_rdev = 0; /* Zero in core version of rdev */
1009 mutex_enter(&ip->i_tlock);
1010 ip->i_flag |= IUPD|ICHG;
1011 mutex_exit(&ip->i_tlock);
1012 ud_ifree(ip, type);
1013 ip->i_icb_prn = 0xFFFF;
1014 } else if (!IS_SWAPVP(vp)) {
1016 * Write the inode out if dirty. Pages are
1017 * written back and put on the freelist.
1019 (void) ud_syncip(ip, B_FREE | B_ASYNC, 0);
1021 * Do nothing if inode is now busy -- inode may
1022 * have gone busy because ud_syncip
1023 * releases/reacquires the i_contents lock
1025 mutex_enter(&vp->v_lock);
1026 if (vp->v_count > 1) {
1027 VN_RELE_LOCKED(vp);
1028 mutex_exit(&vp->v_lock);
1029 rw_exit(&ip->i_contents);
1030 return;
1032 mutex_exit(&vp->v_lock);
1033 } else {
1034 ud_iupdat(ip, 0);
1040 * Put the inode on the end of the free list.
1041 * Possibly in some cases it would be better to
1042 * put the inode at the head of the free list,
1043 * (e.g.: where i_perm == 0 || i_number == 0)
1044 * but I will think about that later.
1045 * (i_number is rarely 0 - only after an i/o error in ud_iget,
1046 * where i_perm == 0, the inode will probably be wanted
1047 * again soon for an ialloc, so possibly we should keep it)
1050 * If inode is invalid or there is no page associated with
1051 * this inode, put the inode in the front of the free list.
1052 * Since we have a VN_HOLD on the vnode, and checked that it
1053 * wasn't already on the freelist when we entered, we can safely
1054 * put it on the freelist even if another thread puts a VN_HOLD
1055 * on it (pageout/ud_iget).
1057 tryagain:
1058 mutex_enter(&ud_nino_lock);
1059 if (vn_has_cached_data(vp)) {
1060 mutex_exit(&ud_nino_lock);
1061 mutex_enter(&vp->v_lock);
1062 VN_RELE_LOCKED(vp);
1063 mutex_exit(&vp->v_lock);
1064 mutex_enter(&ip->i_tlock);
1065 mutex_enter(&udf_ifree_lock);
1066 ud_add_to_free_list(ip, UD_END);
1067 mutex_exit(&udf_ifree_lock);
1068 ip->i_flag &= IMODTIME;
1069 mutex_exit(&ip->i_tlock);
1070 rw_exit(&ip->i_contents);
1071 } else if (busy || ud_cur_inodes < ud_max_inodes) {
1072 mutex_exit(&ud_nino_lock);
1074 * We're not over our high water mark, or it's
1075 * not safe to kmem_free the inode, so put it
1076 * on the freelist.
1078 mutex_enter(&vp->v_lock);
1079 if (vn_has_cached_data(vp)) {
1080 cmn_err(CE_WARN, "ud_iinactive: v_object list is "
1081 "not NULL\n");
1083 VN_RELE_LOCKED(vp);
1084 mutex_exit(&vp->v_lock);
1086 mutex_enter(&ip->i_tlock);
1087 mutex_enter(&udf_ifree_lock);
1088 ud_add_to_free_list(ip, UD_BEGIN);
1089 mutex_exit(&udf_ifree_lock);
1090 ip->i_flag &= IMODTIME;
1091 mutex_exit(&ip->i_tlock);
1092 rw_exit(&ip->i_contents);
1093 } else {
1094 mutex_exit(&ud_nino_lock);
1095 if (vn_has_cached_data(vp)) {
1096 cmn_err(CE_WARN, "ud_iinactive: v_object list is "
1097 "not NULL\n");
1100 * Try to free the inode. We must make sure
1101 * it's o.k. to destroy this inode. We can't destroy
1102 * if a thread is waiting for this inode. If we can't get the
1103 * cache now, put it back on the freelist.
1105 if (!mutex_tryenter(&ud_icache_lock)) {
1106 busy = 1;
1107 goto tryagain;
1109 mutex_enter(&vp->v_lock);
1110 if (vp->v_count > 1) {
1111 /* inode is wanted in ud_iget */
1112 busy = 1;
1113 mutex_exit(&vp->v_lock);
1114 mutex_exit(&ud_icache_lock);
1115 goto tryagain;
1117 mutex_exit(&vp->v_lock);
1118 remque(ip);
1119 ip->i_forw = ip;
1120 ip->i_back = ip;
1121 mutex_enter(&ud_nino_lock);
1122 ud_cur_inodes--;
1123 mutex_exit(&ud_nino_lock);
1124 mutex_exit(&ud_icache_lock);
1125 if (ip->i_icb_prn != 0xFFFF) {
1126 ud_iupdat(ip, 0);
1128 addr = (caddr_t)ip->i_ext;
1129 size = sizeof (struct icb_ext) * ip->i_ext_count;
1130 ip->i_ext = 0;
1131 ip->i_ext_count = ip->i_ext_used = 0;
1132 addr1 = (caddr_t)ip->i_con;
1133 size1 = sizeof (struct icb_ext) * ip->i_con_count;
1134 ip->i_con = 0;
1135 ip->i_con_count = ip->i_con_used = ip->i_con_read = 0;
1136 cv_destroy(&ip->i_wrcv); /* throttling */
1137 rw_destroy(&ip->i_rwlock);
1138 rw_exit(&ip->i_contents);
1139 rw_destroy(&ip->i_contents);
1140 kmem_free(addr, size);
1141 kmem_free(addr1, size1);
1142 ip->i_marker3 = (uint32_t)0xDDDDDDDD;
1143 vn_free(vp);
1144 kmem_free(ip, sizeof (struct ud_inode));
1149 void
1150 ud_iupdat(struct ud_inode *ip, int32_t waitfor)
1152 uint16_t flag, tag_flags;
1153 int32_t error;
1154 struct buf *bp;
1155 struct udf_vfs *udf_vfsp;
1156 struct file_entry *fe;
1157 uint16_t crc_len = 0;
1159 ASSERT(RW_WRITE_HELD(&ip->i_contents));
1161 ud_printf("ud_iupdat\n");
1163 * Return if file system has been forcibly umounted.
1165 if (ip->i_udf == NULL) {
1166 return;
1169 udf_vfsp = ip->i_udf;
1170 flag = ip->i_flag; /* Atomic read */
1171 if ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC)) != 0) {
1172 if (udf_vfsp->udf_flags & UDF_FL_RDONLY) {
1173 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG);
1174 return;
1177 bp = ud_bread(ip->i_dev,
1178 ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
1179 ip->i_udf->udf_lbsize);
1180 if (bp->b_flags & B_ERROR) {
1181 brelse(bp);
1182 return;
1184 fe = (struct file_entry *)bp->b_un.b_addr;
1185 if (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
1186 ip->i_icb_block,
1187 1, ip->i_udf->udf_lbsize) != 0) {
1188 brelse(bp);
1189 return;
1192 mutex_enter(&ip->i_tlock);
1193 if (ip->i_flag & (IUPD|IACC|ICHG)) {
1194 IMARK(ip);
1196 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD|IMODACC);
1197 mutex_exit(&ip->i_tlock);
1199 fe->fe_uid = SWAP_32(ip->i_uid);
1200 fe->fe_gid = SWAP_32(ip->i_gid);
1202 fe->fe_perms = SWAP_32(ip->i_perm);
1204 fe->fe_lcount = SWAP_16(ip->i_nlink);
1205 fe->fe_info_len = SWAP_64(ip->i_size);
1206 fe->fe_lbr = SWAP_64(ip->i_lbr);
1208 ud_utime2dtime(&ip->i_atime, &fe->fe_acc_time);
1209 ud_utime2dtime(&ip->i_mtime, &fe->fe_mod_time);
1210 ud_utime2dtime(&ip->i_ctime, &fe->fe_attr_time);
1212 if (ip->i_char & ISUID) {
1213 tag_flags = ICB_FLAG_SETUID;
1214 } else {
1215 tag_flags = 0;
1217 if (ip->i_char & ISGID) {
1218 tag_flags |= ICB_FLAG_SETGID;
1220 if (ip->i_char & ISVTX) {
1221 tag_flags |= ICB_FLAG_STICKY;
1223 tag_flags |= ip->i_desc_type;
1226 * Remove the following it is no longer contig
1227 * if (ip->i_astrat == STRAT_TYPE4) {
1228 * tag_flags |= ICB_FLAG_CONTIG;
1232 fe->fe_icb_tag.itag_flags &= ~SWAP_16((uint16_t)0x3C3);
1233 fe->fe_icb_tag.itag_strategy = SWAP_16(ip->i_astrat);
1234 fe->fe_icb_tag.itag_flags |= SWAP_16(tag_flags);
1236 ud_update_regid(&fe->fe_impl_id);
1238 crc_len = offsetof(struct file_entry, fe_spec) +
1239 SWAP_32(fe->fe_len_ear);
1240 if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
1241 crc_len += ip->i_size;
1242 fe->fe_len_adesc = SWAP_32(((uint32_t)ip->i_size));
1243 } else if ((ip->i_size != 0) && (ip->i_ext != NULL) &&
1244 (ip->i_ext_used != 0)) {
1246 if ((error = ud_read_icb_till_off(ip,
1247 ip->i_size)) == 0) {
1248 if (ip->i_astrat == STRAT_TYPE4) {
1249 error = ud_updat_ext4(ip, fe);
1250 } else if (ip->i_astrat == STRAT_TYPE4096) {
1251 error = ud_updat_ext4096(ip, fe);
1253 if (error) {
1254 udf_vfsp->udf_mark_bad = 1;
1257 crc_len += SWAP_32(fe->fe_len_adesc);
1258 } else {
1259 fe->fe_len_adesc = 0;
1263 * Zero out the rest of the block
1265 bzero(bp->b_un.b_addr + crc_len,
1266 ip->i_udf->udf_lbsize - crc_len);
1268 ud_make_tag(ip->i_udf, &fe->fe_tag,
1269 UD_FILE_ENTRY, ip->i_icb_block, crc_len);
1272 if (waitfor) {
1273 BWRITE(bp);
1276 * Synchronous write has guaranteed that inode
1277 * has been written on disk so clear the flag
1279 ip->i_flag &= ~(IBDWRITE);
1280 } else {
1281 bdwrite(bp);
1284 * This write hasn't guaranteed that inode has been
1285 * written on the disk.
1286 * Since, all updat flags on indoe are cleared, we must
1287 * remember the condition in case inode is to be updated
1288 * synchronously later (e.g.- fsync()/fdatasync())
1289 * and inode has not been modified yet.
1291 ip->i_flag |= (IBDWRITE);
1293 } else {
1295 * In case previous inode update was done asynchronously
1296 * (IBDWRITE) and this inode update request wants guaranteed
1297 * (synchronous) disk update, flush the inode.
1299 if (waitfor && (flag & IBDWRITE)) {
1300 blkflush(ip->i_dev,
1301 (daddr_t)fsbtodb(udf_vfsp, ip->i_icb_lbano));
1302 ip->i_flag &= ~(IBDWRITE);
1307 int32_t
1308 ud_updat_ext4(struct ud_inode *ip, struct file_entry *fe)
1310 uint32_t dummy;
1311 int32_t elen, ndent, index, count, con_index;
1312 daddr_t bno;
1313 struct buf *bp;
1314 struct short_ad *sad;
1315 struct long_ad *lad;
1316 struct icb_ext *iext, *icon;
1319 ASSERT(ip);
1320 ASSERT(fe);
1321 ASSERT((ip->i_desc_type == ICB_FLAG_SHORT_AD) ||
1322 (ip->i_desc_type == ICB_FLAG_LONG_AD));
1324 if (ip->i_desc_type == ICB_FLAG_SHORT_AD) {
1325 elen = sizeof (struct short_ad);
1326 sad = (struct short_ad *)
1327 (fe->fe_spec + SWAP_32(fe->fe_len_ear));
1328 } else if (ip->i_desc_type == ICB_FLAG_LONG_AD) {
1329 elen = sizeof (struct long_ad);
1330 lad = (struct long_ad *)
1331 (fe->fe_spec + SWAP_32(fe->fe_len_ear));
1332 } else {
1333 /* This cannot happen return */
1334 return (EINVAL);
1337 ndent = ip->i_max_emb / elen;
1339 if (ip->i_ext_used < ndent) {
1341 if (ip->i_desc_type == ICB_FLAG_SHORT_AD) {
1342 ud_make_sad(ip->i_ext, sad, ip->i_ext_used);
1343 } else {
1344 ud_make_lad(ip->i_ext, lad, ip->i_ext_used);
1346 fe->fe_len_adesc = SWAP_32(ip->i_ext_used * elen);
1347 con_index = 0;
1348 } else {
1350 con_index = index = 0;
1352 while (index < ip->i_ext_used) {
1353 if (index == 0) {
1355 * bp is already read
1356 * First few extents will go
1357 * into the file_entry
1359 count = ndent - 1;
1360 fe->fe_len_adesc = SWAP_32(ndent * elen);
1361 bp = NULL;
1364 * Last entry to be cont ext
1366 icon = &ip->i_con[con_index];
1367 } else {
1369 * Read the buffer
1371 icon = &ip->i_con[con_index];
1373 bno = ud_xlate_to_daddr(ip->i_udf,
1374 icon->ib_prn, icon->ib_block,
1375 icon->ib_count >> ip->i_udf->udf_l2d_shift,
1376 &dummy);
1377 bp = ud_bread(ip->i_dev,
1378 bno << ip->i_udf->udf_l2d_shift,
1379 ip->i_udf->udf_lbsize);
1380 if (bp->b_flags & B_ERROR) {
1381 brelse(bp);
1382 return (EIO);
1386 * Figure out how many extents in
1387 * this time
1389 count = (bp->b_bcount -
1390 sizeof (struct alloc_ext_desc)) / elen;
1391 if (count > (ip->i_ext_used - index)) {
1392 count = ip->i_ext_used - index;
1393 } else {
1394 count --;
1396 con_index++;
1397 if (con_index >= ip->i_con_used) {
1398 icon = NULL;
1399 } else {
1400 icon = &ip->i_con[con_index];
1407 * convert to on disk form and
1408 * update
1410 iext = &ip->i_ext[index];
1411 if (ip->i_desc_type == ICB_FLAG_SHORT_AD) {
1412 if (index != 0) {
1413 sad = (struct short_ad *)
1414 (bp->b_un.b_addr +
1415 sizeof (struct alloc_ext_desc));
1417 ud_make_sad(iext, sad, count);
1418 sad += count;
1419 if (icon != NULL) {
1420 ud_make_sad(icon, sad, 1);
1422 } else {
1423 if (index != 0) {
1424 lad = (struct long_ad *)
1425 (bp->b_un.b_addr +
1426 sizeof (struct alloc_ext_desc));
1428 ud_make_lad(iext, lad, count);
1429 lad += count;
1430 if (icon != NULL) {
1431 ud_make_lad(icon, lad, 1);
1435 if (con_index != 0) {
1436 struct alloc_ext_desc *aed;
1437 int32_t sz;
1438 struct icb_ext *oicon;
1440 oicon = &ip->i_con[con_index - 1];
1441 sz = count * elen;
1442 if (icon != NULL) {
1443 sz += elen;
1445 aed = (struct alloc_ext_desc *)bp->b_un.b_addr;
1446 aed->aed_len_aed = SWAP_32(sz);
1447 if (con_index == 1) {
1448 aed->aed_rev_ael =
1449 SWAP_32(ip->i_icb_block);
1450 } else {
1451 aed->aed_rev_ael =
1452 SWAP_32(oicon->ib_block);
1454 sz += sizeof (struct alloc_ext_desc);
1455 ud_make_tag(ip->i_udf, &aed->aed_tag,
1456 UD_ALLOC_EXT_DESC, oicon->ib_block, sz);
1460 * Write back to disk
1462 if (bp != NULL) {
1463 BWRITE(bp);
1465 index += count;
1470 if (con_index != ip->i_con_used) {
1471 int32_t lbmask, l2b, temp;
1473 temp = con_index;
1474 lbmask = ip->i_udf->udf_lbmask;
1475 l2b = ip->i_udf->udf_l2b_shift;
1477 * Free unused continuation extents
1479 for (; con_index < ip->i_con_used; con_index++) {
1480 icon = &ip->i_con[con_index];
1481 count = (icon->ib_count + lbmask) >> l2b;
1482 ud_free_space(ip->i_udf->udf_vfs, icon->ib_prn,
1483 icon->ib_block, count);
1484 count = (count << l2b) - sizeof (struct alloc_ext_desc);
1485 ip->i_cur_max_ext -= (count / elen) - 1;
1487 ip->i_con_used = temp;
1489 return (0);
1492 /* ARGSUSED */
1493 int32_t
1494 ud_updat_ext4096(struct ud_inode *ip, struct file_entry *fe)
1496 return (ENXIO);
1499 void
1500 ud_make_sad(struct icb_ext *iext, struct short_ad *sad, int32_t count)
1502 int32_t index = 0, scount;
1504 ASSERT(iext);
1505 ASSERT(sad);
1507 if (count != 0) {
1508 ASSERT(count > 0);
1509 while (index < count) {
1510 scount = (iext->ib_count & 0x3FFFFFFF) |
1511 (iext->ib_flags << 30);
1512 sad->sad_ext_len = SWAP_32(scount);
1513 sad->sad_ext_loc = SWAP_32(iext->ib_block);
1514 sad++;
1515 iext++;
1516 index++;
1521 void
1522 ud_make_lad(struct icb_ext *iext, struct long_ad *lad, int32_t count)
1524 int32_t index = 0, scount;
1526 ASSERT(iext);
1527 ASSERT(lad);
1529 if (count != 0) {
1530 ASSERT(count > 0);
1532 while (index < count) {
1533 lad->lad_ext_prn = SWAP_16(iext->ib_prn);
1534 scount = (iext->ib_count & 0x3FFFFFFF) |
1535 (iext->ib_flags << 30);
1536 lad->lad_ext_len = SWAP_32(scount);
1537 lad->lad_ext_loc = SWAP_32(iext->ib_block);
1538 lad++;
1539 iext++;
1540 index++;
1546 * Truncate the inode ip to at most length size.
1547 * Free affected disk blocks -- the blocks of the
1548 * file are removed in reverse order.
1550 /* ARGSUSED */
1552 ud_itrunc(struct ud_inode *oip, uoff_t length,
1553 int32_t flags, struct cred *cr)
1555 int32_t error, boff;
1556 off_t bsize;
1557 mode_t mode;
1558 struct udf_vfs *udf_vfsp;
1560 ud_printf("ud_itrunc\n");
1562 ASSERT(RW_WRITE_HELD(&oip->i_contents));
1563 udf_vfsp = oip->i_udf;
1564 bsize = udf_vfsp->udf_lbsize;
1567 * We only allow truncation of regular files and directories
1568 * to arbritary lengths here. In addition, we allow symbolic
1569 * links to be truncated only to zero length. Other inode
1570 * types cannot have their length set here.
1572 mode = oip->i_type;
1573 if (mode == VFIFO) {
1574 return (0);
1576 if ((mode != VREG) && (mode != VDIR) &&
1577 (!(mode == VLNK && length == 0))) {
1578 return (EINVAL);
1580 if (length == oip->i_size) {
1581 /* update ctime and mtime to please POSIX tests */
1582 mutex_enter(&oip->i_tlock);
1583 oip->i_flag |= ICHG |IUPD;
1584 mutex_exit(&oip->i_tlock);
1585 return (0);
1588 boff = blkoff(udf_vfsp, length);
1590 if (length > oip->i_size) {
1592 * Trunc up case.ud_bmap_write will insure that the right blocks
1593 * are allocated. This includes doing any work needed for
1594 * allocating the last block.
1596 if (boff == 0) {
1597 error = ud_bmap_write(oip, length - 1,
1598 (int)bsize, 0, cr);
1599 } else {
1600 error = ud_bmap_write(oip, length - 1, boff, 0, cr);
1602 if (error == 0) {
1603 uoff_t osize = oip->i_size;
1604 oip->i_size = length;
1607 * Make sure we zero out the remaining bytes of
1608 * the page in case a mmap scribbled on it. We
1609 * can't prevent a mmap from writing beyond EOF
1610 * on the last page of a file.
1612 if ((boff = blkoff(udf_vfsp, osize)) != 0) {
1613 pvn_vpzero(ITOV(oip), osize,
1614 (uint32_t)(bsize - boff));
1616 mutex_enter(&oip->i_tlock);
1617 oip->i_flag |= ICHG;
1618 ITIMES_NOLOCK(oip);
1619 mutex_exit(&oip->i_tlock);
1621 return (error);
1625 * Update the pages of the file. If the file is not being
1626 * truncated to a block boundary, the contents of the
1627 * pages following the end of the file must be zero'ed
1628 * in case it ever become accessable again because
1629 * of subsequent file growth.
1631 if (boff == 0) {
1632 (void) pvn_vplist_dirty(ITOV(oip), length,
1633 ud_putapage, B_INVAL | B_TRUNC, CRED());
1634 } else {
1636 * Make sure that the last block is properly allocated.
1637 * We only really have to do this if the last block is
1638 * actually allocated. Just to be sure, we do it now
1639 * independent of current allocation.
1641 error = ud_bmap_write(oip, length - 1, boff, 0, cr);
1642 if (error) {
1643 return (error);
1646 pvn_vpzero(ITOV(oip), length, (uint32_t)(bsize - boff));
1648 (void) pvn_vplist_dirty(ITOV(oip), length,
1649 ud_putapage, B_INVAL | B_TRUNC, CRED());
1653 /* Free the blocks */
1654 if (oip->i_desc_type == ICB_FLAG_ONE_AD) {
1655 if (length > oip->i_max_emb) {
1656 return (EFBIG);
1658 oip->i_size = length;
1659 mutex_enter(&oip->i_tlock);
1660 oip->i_flag |= ICHG|IUPD;
1661 mutex_exit(&oip->i_tlock);
1662 ud_iupdat(oip, 1);
1663 } else {
1664 if ((error = ud_read_icb_till_off(oip, oip->i_size)) != 0) {
1665 return (error);
1668 if (oip->i_astrat == STRAT_TYPE4) {
1669 ud_trunc_ext4(oip, length);
1670 } else if (oip->i_astrat == STRAT_TYPE4096) {
1671 ud_trunc_ext4096(oip, length);
1675 done:
1676 return (0);
1679 void
1680 ud_trunc_ext4(struct ud_inode *ip, uoff_t length)
1682 int32_t index, l2b, count, ecount;
1683 int32_t elen, ndent, nient;
1684 uoff_t ext_beg, ext_end;
1685 struct icb_ext *iext, *icon;
1686 int32_t lbmask, ext_used;
1687 uint32_t loc;
1688 struct icb_ext text;
1689 uint32_t con_freed;
1691 ASSERT((ip->i_desc_type == ICB_FLAG_SHORT_AD) ||
1692 (ip->i_desc_type == ICB_FLAG_LONG_AD));
1694 if (ip->i_ext_used == 0) {
1695 return;
1698 ext_used = ip->i_ext_used;
1700 lbmask = ip->i_udf->udf_lbmask;
1701 l2b = ip->i_udf->udf_l2b_shift;
1703 ASSERT(ip->i_ext);
1705 ip->i_lbr = 0;
1706 for (index = 0; index < ext_used; index++) {
1707 iext = &ip->i_ext[index];
1710 * Find the begining and end
1711 * of current extent
1713 ext_beg = iext->ib_offset;
1714 ext_end = iext->ib_offset +
1715 ((iext->ib_count + lbmask) & ~lbmask);
1718 * This is the extent that has offset "length"
1719 * make a copy of this extent and
1720 * remember the index. We can use
1721 * it to free blocks
1723 if ((length <= ext_end) && (length >= ext_beg)) {
1724 text = *iext;
1726 iext->ib_count = length - ext_beg;
1727 ip->i_ext_used = index + 1;
1728 break;
1730 if (iext->ib_flags != IB_UN_RE_AL) {
1731 ip->i_lbr += iext->ib_count >> l2b;
1734 if (ip->i_ext_used != index) {
1735 if (iext->ib_flags != IB_UN_RE_AL) {
1736 ip->i_lbr +=
1737 ((iext->ib_count + lbmask) & ~lbmask) >> l2b;
1741 ip->i_size = length;
1742 mutex_enter(&ip->i_tlock);
1743 ip->i_flag |= ICHG|IUPD;
1744 mutex_exit(&ip->i_tlock);
1745 ud_iupdat(ip, 1);
1748 * Free the unused space
1750 if (text.ib_flags != IB_UN_RE_AL) {
1751 count = (ext_end - length) >> l2b;
1752 if (count) {
1753 loc = text.ib_block +
1754 (((length - text.ib_offset) + lbmask) >> l2b);
1755 ud_free_space(ip->i_udf->udf_vfs, text.ib_prn,
1756 loc, count);
1759 for (index = ip->i_ext_used; index < ext_used; index++) {
1760 iext = &ip->i_ext[index];
1761 if (iext->ib_flags != IB_UN_RE_AL) {
1762 count = (iext->ib_count + lbmask) >> l2b;
1763 ud_free_space(ip->i_udf->udf_vfs, iext->ib_prn,
1764 iext->ib_block, count);
1766 bzero(iext, sizeof (struct icb_ext));
1767 continue;
1771 * release any continuation blocks
1773 if (ip->i_con) {
1775 ASSERT(ip->i_con_count >= ip->i_con_used);
1778 * Find out how many indirect blocks
1779 * are required and release the rest
1781 if (ip->i_desc_type == ICB_FLAG_SHORT_AD) {
1782 elen = sizeof (struct short_ad);
1783 } else if (ip->i_desc_type == ICB_FLAG_LONG_AD) {
1784 elen = sizeof (struct long_ad);
1786 ndent = ip->i_max_emb / elen;
1787 if (ip->i_ext_used > ndent) {
1788 ecount = ip->i_ext_used - ndent;
1789 } else {
1790 ecount = 0;
1792 con_freed = 0;
1793 for (index = 0; index < ip->i_con_used; index++) {
1794 icon = &ip->i_con[index];
1795 nient = icon->ib_count -
1796 (sizeof (struct alloc_ext_desc) + elen);
1797 /* Header + 1 indirect extent */
1798 nient /= elen;
1799 if (ecount) {
1800 if (ecount > nient) {
1801 ecount -= nient;
1802 } else {
1803 ecount = 0;
1805 } else {
1806 count = ((icon->ib_count + lbmask) &
1807 ~lbmask) >> l2b;
1808 ud_free_space(ip->i_udf->udf_vfs,
1809 icon->ib_prn, icon->ib_block, count);
1810 con_freed++;
1811 ip->i_cur_max_ext -= nient;
1815 * set the continuation extents used(i_con_used)i to correct
1816 * value. It is possible for i_con_used to be zero,
1817 * if we free up all continuation extents. This happens
1818 * when ecount is 0 before entering the for loop above.
1820 ip->i_con_used -= con_freed;
1821 if (ip->i_con_read > ip->i_con_used) {
1822 ip->i_con_read = ip->i_con_used;
1827 void
1828 ud_trunc_ext4096(struct ud_inode *ip, uoff_t length)
1831 * Truncate code is the same for
1832 * both file of type 4 and 4096
1834 ud_trunc_ext4(ip, length);
1838 * Remove any inodes in the inode cache belonging to dev
1840 * There should not be any active ones, return error if any are found but
1841 * still invalidate others (N.B.: this is a user error, not a system error).
1843 * Also, count the references to dev by block devices - this really
1844 * has nothing to do with the object of the procedure, but as we have
1845 * to scan the inode table here anyway, we might as well get the
1846 * extra benefit.
1848 int32_t
1849 ud_iflush(struct vfs *vfsp)
1851 int32_t index, busy = 0;
1852 union ihead *ih;
1853 struct udf_vfs *udf_vfsp;
1854 dev_t dev;
1855 struct vnode *rvp, *vp;
1856 struct ud_inode *ip, *next;
1858 ud_printf("ud_iflush\n");
1859 udf_vfsp = (struct udf_vfs *)vfsp->vfs_data;
1860 rvp = udf_vfsp->udf_root;
1861 dev = vfsp->vfs_dev;
1863 mutex_enter(&ud_icache_lock);
1864 for (index = 0; index < UD_HASH_SZ; index++) {
1865 ih = &ud_ihead[index];
1867 next = ih->ih_chain[0];
1868 while (next != (struct ud_inode *)ih) {
1869 ip = next;
1870 next = ip->i_forw;
1871 if (ip->i_dev != dev) {
1872 continue;
1874 vp = ITOV(ip);
1876 * root inode is processed by the caller
1878 if (vp == rvp) {
1879 if (vp->v_count > 1) {
1880 busy = -1;
1882 continue;
1884 if (ip->i_flag & IREF) {
1886 * Set error indicator for return value,
1887 * but continue invalidating other
1888 * inodes.
1890 busy = -1;
1891 continue;
1894 rw_enter(&ip->i_contents, RW_WRITER);
1895 remque(ip);
1896 ip->i_forw = ip;
1897 ip->i_back = ip;
1899 * Hold the vnode since its not done
1900 * in fop_putpage anymore.
1902 VN_HOLD(vp);
1904 * XXX Synchronous write holding
1905 * cache lock
1907 (void) ud_syncip(ip, B_INVAL, I_SYNC);
1908 rw_exit(&ip->i_contents);
1909 VN_RELE(vp);
1912 mutex_exit(&ud_icache_lock);
1914 return (busy);
1919 * Check mode permission on inode. Mode is READ, WRITE or EXEC.
1920 * In the case of WRITE, the read-only status of the file system
1921 * is checked. The applicable mode bits are compared with the
1922 * requested form of access. If bits are missing, the secpolicy
1923 * function will check for privileges.
1926 ud_iaccess(struct ud_inode *ip, int32_t mode, struct cred *cr, int dolock)
1928 int shift = 0;
1929 int ret = 0;
1931 if (dolock)
1932 rw_enter(&ip->i_contents, RW_READER);
1933 ASSERT(RW_LOCK_HELD(&ip->i_contents));
1935 ud_printf("ud_iaccess\n");
1936 if (mode & IWRITE) {
1938 * Disallow write attempts on read-only
1939 * file systems, unless the file is a block
1940 * or character device or a FIFO.
1942 if (ip->i_udf->udf_flags & UDF_FL_RDONLY) {
1943 if ((ip->i_type != VCHR) &&
1944 (ip->i_type != VBLK) &&
1945 (ip->i_type != VFIFO)) {
1946 ret = EROFS;
1947 goto out;
1953 * Access check is based on only
1954 * one of owner, group, public.
1955 * If not owner, then check group.
1956 * If not a member of the group, then
1957 * check public access.
1959 if (crgetuid(cr) != ip->i_uid) {
1960 shift += 5;
1961 if (!groupmember((uid_t)ip->i_gid, cr))
1962 shift += 5;
1965 ret = secpolicy_vnode_access2(cr, ITOV(ip), ip->i_uid,
1966 UD2VA_PERM(ip->i_perm << shift), UD2VA_PERM(mode));
1968 out:
1969 if (dolock)
1970 rw_exit(&ip->i_contents);
1971 return (ret);
1974 void
1975 ud_imark(struct ud_inode *ip)
1977 timestruc_t now;
1979 gethrestime(&now);
1980 ud_printf("ud_imark\n");
1981 if (ip->i_flag & IACC) {
1982 ip->i_atime.tv_sec = now.tv_sec;
1983 ip->i_atime.tv_nsec = now.tv_nsec;
1985 if (ip->i_flag & IUPD) {
1986 ip->i_mtime.tv_sec = now.tv_sec;
1987 ip->i_mtime.tv_nsec = now.tv_nsec;
1988 ip->i_flag |= IMODTIME;
1990 if (ip->i_flag & ICHG) {
1991 ip->i_diroff = 0;
1992 ip->i_ctime.tv_sec = now.tv_sec;
1993 ip->i_ctime.tv_nsec = now.tv_nsec;
1998 void
1999 ud_itimes_nolock(struct ud_inode *ip)
2001 ud_printf("ud_itimes_nolock\n");
2003 if (ip->i_flag & (IUPD|IACC|ICHG)) {
2004 if (ip->i_flag & ICHG) {
2005 ip->i_flag |= IMOD;
2006 } else {
2007 ip->i_flag |= IMODACC;
2009 ud_imark(ip);
2010 ip->i_flag &= ~(IACC|IUPD|ICHG);
2014 void
2015 ud_delcache(struct ud_inode *ip)
2017 ud_printf("ud_delcache\n");
2019 mutex_enter(&ud_icache_lock);
2020 remque(ip);
2021 ip->i_forw = ip;
2022 ip->i_back = ip;
2023 mutex_exit(&ud_icache_lock);
2026 void
2027 ud_idrop(struct ud_inode *ip)
2029 struct vnode *vp = ITOV(ip);
2031 ASSERT(RW_WRITE_HELD(&ip->i_contents));
2033 ud_printf("ud_idrop\n");
2035 mutex_enter(&vp->v_lock);
2036 VN_RELE_LOCKED(vp);
2037 if (vp->v_count > 0) {
2038 mutex_exit(&vp->v_lock);
2039 return;
2041 mutex_exit(&vp->v_lock);
2044 * if inode is invalid or there is no page associated with
2045 * this inode, put the inode in the front of the free list
2047 mutex_enter(&ip->i_tlock);
2048 mutex_enter(&udf_ifree_lock);
2049 if (!vn_has_cached_data(vp) || ip->i_perm == 0) {
2050 ud_add_to_free_list(ip, UD_BEGIN);
2051 } else {
2053 * Otherwise, put the inode back on the end of the free list.
2055 ud_add_to_free_list(ip, UD_END);
2057 mutex_exit(&udf_ifree_lock);
2058 ip->i_flag &= IMODTIME;
2059 mutex_exit(&ip->i_tlock);
2062 void
2063 ud_add_to_free_list(struct ud_inode *ip, uint32_t at)
2065 ASSERT(ip);
2066 ASSERT(mutex_owned(&udf_ifree_lock));
2068 #ifdef DEBUG
2069 /* Search if the element is already in the list */
2070 if (udf_ifreeh != NULL) {
2071 struct ud_inode *iq;
2073 iq = udf_ifreeh;
2074 while (iq) {
2075 if (iq == ip) {
2076 cmn_err(CE_WARN, "Duplicate %p\n", (void *)ip);
2078 iq = iq->i_freef;
2081 #endif
2083 ip->i_freef = NULL;
2084 ip->i_freeb = NULL;
2085 if (udf_ifreeh == NULL) {
2087 * Nothing on the list just add it
2089 udf_ifreeh = ip;
2090 udf_ifreet = ip;
2091 } else {
2092 if (at == UD_BEGIN) {
2094 * Add at the begining of the list
2096 ip->i_freef = udf_ifreeh;
2097 udf_ifreeh->i_freeb = ip;
2098 udf_ifreeh = ip;
2099 } else {
2101 * Add at the end of the list
2103 ip->i_freeb = udf_ifreet;
2104 udf_ifreet->i_freef = ip;
2105 udf_ifreet = ip;
2110 void
2111 ud_remove_from_free_list(struct ud_inode *ip, uint32_t at)
2113 ASSERT(ip);
2114 ASSERT(mutex_owned(&udf_ifree_lock));
2116 #ifdef DEBUG
2118 struct ud_inode *iq;
2119 uint32_t found = 0;
2121 iq = udf_ifreeh;
2122 while (iq) {
2123 if (iq == ip) {
2124 found++;
2126 iq = iq->i_freef;
2128 if (found != 1) {
2129 cmn_err(CE_WARN, "ip %p is found %x times\n",
2130 (void *)ip, found);
2133 #endif
2135 if ((ip->i_freef == NULL) && (ip->i_freeb == NULL)) {
2136 if (ip != udf_ifreeh) {
2137 return;
2141 if ((at == UD_BEGIN) || (ip == udf_ifreeh)) {
2142 udf_ifreeh = ip->i_freef;
2143 if (ip->i_freef == NULL) {
2144 udf_ifreet = NULL;
2145 } else {
2146 udf_ifreeh->i_freeb = NULL;
2148 } else {
2149 ip->i_freeb->i_freef = ip->i_freef;
2150 if (ip->i_freef) {
2151 ip->i_freef->i_freeb = ip->i_freeb;
2152 } else {
2153 udf_ifreet = ip->i_freeb;
2156 ip->i_freef = NULL;
2157 ip->i_freeb = NULL;
2160 void
2161 ud_init_inodes(void)
2163 union ihead *ih = ud_ihead;
2164 int index;
2166 _NOTE(NO_COMPETING_THREADS_NOW);
2167 for (index = 0; index < UD_HASH_SZ; index++, ih++) {
2168 ih->ih_head[0] = ih;
2169 ih->ih_head[1] = ih;
2171 mutex_init(&ud_icache_lock, NULL, MUTEX_DEFAULT, NULL);
2172 mutex_init(&ud_nino_lock, NULL, MUTEX_DEFAULT, NULL);
2174 udf_ifreeh = NULL;
2175 udf_ifreet = NULL;
2176 mutex_init(&udf_ifree_lock, NULL, MUTEX_DEFAULT, NULL);
2178 mutex_init(&ud_sync_busy, NULL, MUTEX_DEFAULT, NULL);
2179 udf_vfs_instances = NULL;
2180 mutex_init(&udf_vfs_mutex, NULL, MUTEX_DEFAULT, NULL);
2182 _NOTE(COMPETING_THREADS_NOW);