4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2012 Milan Jurik. All rights reserved.
27 #include <sys/systm.h>
28 #include <sys/types.h>
29 #include <sys/vnode.h>
30 #include <sys/errno.h>
31 #include <sys/sysmacros.h>
32 #include <sys/debug.h>
36 #include <sys/cmn_err.h>
37 #include <sys/fs/ufs_inode.h>
38 #include <sys/fs/ufs_filio.h>
39 #include <sys/fs/ufs_log.h>
40 #include <sys/inttypes.h>
41 #include <sys/atomic.h>
42 #include <sys/tuneable.h>
47 extern pri_t minclsyspri
;
48 extern struct kmem_cache
*lufs_bp
;
49 extern int ufs_trans_push_quota(ufsvfs_t
*, delta_t
, struct dquot
*);
54 kmem_cache_t
*mapentry_cache
;
57 * logmap tuning constants
59 long logmap_maxnme_commit
= 2048;
60 long logmap_maxnme_async
= 4096;
61 long logmap_maxnme_sync
= 6144;
62 long logmap_maxcfrag_commit
= 4; /* Max canceled fragments per moby */
65 uint64_t ufs_crb_size
= 0; /* current size of all crb buffers */
66 uint64_t ufs_crb_max_size
= 0; /* highest crb buffer use so far */
67 size_t ufs_crb_limit
; /* max allowable size for crbs */
68 uint64_t ufs_crb_alloc_fails
= 0; /* crb allocation failures stat */
69 #define UFS_MAX_CRB_DEFAULT_DIVISOR 10 /* max 1/10 kmem_maxavail() */
70 int ufs_max_crb_divisor
= UFS_MAX_CRB_DEFAULT_DIVISOR
; /* tunable */
71 void handle_dquot(mapentry_t
*);
74 * GENERIC MAP ROUTINES
77 #define CRB_FREE(crb, me) \
78 kmem_free(crb->c_buf, crb->c_nb); \
79 atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
80 kmem_free(crb, sizeof (crb_t)); \
83 #define CRB_RELE(me) { \
84 crb_t *crb = (me)->me_crb; \
85 if (crb && (--crb->c_refcnt == 0)) { \
91 * Check that the old delta has an argument and a push function of
92 * ufs_trans_push_quota(), then check that the old and new deltas differ.
93 * If so we clean up with handle_dquot() before replacing the old delta.
95 #define HANDLE_DQUOT(me, melist) { \
97 (me->me_func == ufs_trans_push_quota)) { \
98 if (!((me->me_dt == melist->me_dt) && \
99 (me->me_arg == melist->me_arg) && \
100 (me->me_func == melist->me_func))) { \
107 * free up all the mapentries for a map
110 map_free_entries(mt_map_t
*mtm
)
115 while ((me
= mtm
->mtm_next
) != (mapentry_t
*)mtm
) {
116 me
->me_next
->me_prev
= me
->me_prev
;
117 me
->me_prev
->me_next
= me
->me_next
;
119 kmem_cache_free(mapentry_cache
, me
);
121 for (i
= 0; i
< mtm
->mtm_nhash
; i
++)
122 mtm
->mtm_hash
[i
] = NULL
;
128 * done with map; free if necessary
131 map_put(mt_map_t
*mtm
)
134 * free up the map's memory
136 map_free_entries(mtm
);
137 ASSERT(map_put_debug(mtm
));
138 kmem_free(mtm
->mtm_hash
,
139 (size_t) (sizeof (mapentry_t
*) * mtm
->mtm_nhash
));
140 mutex_destroy(&mtm
->mtm_mutex
);
141 mutex_destroy(&mtm
->mtm_scan_mutex
);
142 cv_destroy(&mtm
->mtm_to_roll_cv
);
143 cv_destroy(&mtm
->mtm_from_roll_cv
);
144 rw_destroy(&mtm
->mtm_rwlock
);
145 mutex_destroy(&mtm
->mtm_lock
);
146 cv_destroy(&mtm
->mtm_cv_commit
);
147 cv_destroy(&mtm
->mtm_cv_next
);
148 cv_destroy(&mtm
->mtm_cv_eot
);
149 cv_destroy(&mtm
->mtm_cv
);
150 kmem_free(mtm
, sizeof (mt_map_t
));
157 map_get(ml_unit_t
*ul
, enum maptypes maptype
, int nh
)
162 * assume the map is not here and allocate the necessary structs
164 mtm
= kmem_zalloc(sizeof (mt_map_t
), KM_SLEEP
);
165 mutex_init(&mtm
->mtm_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
166 mutex_init(&mtm
->mtm_scan_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
167 cv_init(&mtm
->mtm_to_roll_cv
, NULL
, CV_DEFAULT
, NULL
);
168 cv_init(&mtm
->mtm_from_roll_cv
, NULL
, CV_DEFAULT
, NULL
);
169 rw_init(&mtm
->mtm_rwlock
, NULL
, RW_DEFAULT
, NULL
);
170 mtm
->mtm_next
= (mapentry_t
*)mtm
;
171 mtm
->mtm_prev
= (mapentry_t
*)mtm
;
172 mtm
->mtm_hash
= kmem_zalloc((size_t) (sizeof (mapentry_t
*) * nh
),
175 mtm
->mtm_debug
= ul
->un_debug
;
176 mtm
->mtm_type
= maptype
;
179 mtm
->mtm_cfragmax
= logmap_maxcfrag_commit
;
189 mutex_init(&mtm
->mtm_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
190 cv_init(&mtm
->mtm_cv_commit
, NULL
, CV_DEFAULT
, NULL
);
191 cv_init(&mtm
->mtm_cv_next
, NULL
, CV_DEFAULT
, NULL
);
192 cv_init(&mtm
->mtm_cv_eot
, NULL
, CV_DEFAULT
, NULL
);
193 cv_init(&mtm
->mtm_cv
, NULL
, CV_DEFAULT
, NULL
);
194 ASSERT(map_get_debug(ul
, mtm
));
203 * deltamap tuning constants
205 long deltamap_maxnme
= 1024; /* global so it can be set */
208 deltamap_need_commit(mt_map_t
*mtm
)
210 return (mtm
->mtm_nme
> deltamap_maxnme
);
214 * put a delta into a deltamap; may sleep on memory
230 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
231 map_check_linkage(mtm
));
233 mutex_enter(&mtm
->mtm_mutex
);
235 for (hnb
= 0; nb
; nb
-= hnb
, mof
+= hnb
) {
236 hnb
= MAPBLOCKSIZE
- (mof
& MAPBLOCKOFF
);
240 * Search for dup entry. We need to ensure that we don't
241 * replace a map entry which carries quota information
242 * with a map entry which doesn't. In that case we lose
243 * reference the the dquot structure which will not be
244 * cleaned up by the push function me->me_func as this will
246 * The stray dquot would be found later by invalidatedq()
247 * causing a panic when the filesystem is unmounted.
249 mep
= MAP_HASH(mof
, mtm
);
250 for (me
= *mep
; me
; me
= me
->me_hash
) {
251 if (DATAwithinME(mof
, hnb
, me
)) {
253 * Don't remove quota entries which have
254 * incremented the ref count (those with a
255 * ufs_trans_push_quota push function).
256 * Let logmap_add[_buf] clean them up.
258 if (me
->me_func
== ufs_trans_push_quota
) {
263 ASSERT((dtyp
== DT_CANCEL
) ||
264 (!DATAoverlapME(mof
, hnb
, me
)) ||
265 MEwithinDATA(me
, mof
, hnb
));
274 * Add up all the delta map deltas so we can compute
275 * an upper bound on the log size used.
276 * Note, some deltas get removed from the deltamap
277 * before the deltamap_push by lufs_write_strategy
278 * and so multiple deltas to the same mof offset
279 * don't get cancelled here but in the logmap.
280 * Thus we can't easily get a accurate count of
281 * the log space used - only an upper bound.
283 if (tp
&& (mtm
->mtm_ul
->un_deltamap
== mtm
)) {
284 ASSERT(dtyp
!= DT_CANCEL
);
285 if (dtyp
== DT_ABZERO
) {
286 tp
->deltas_size
+= sizeof (struct delta
);
289 (hnb
+ sizeof (struct delta
));
297 * May need to drop & re-grab the mtm_mutex
298 * and then recheck for a duplicate
300 me
= kmem_cache_alloc(mapentry_cache
, KM_NOSLEEP
);
302 mutex_exit(&mtm
->mtm_mutex
);
303 me
= kmem_cache_alloc(mapentry_cache
, KM_SLEEP
);
304 mutex_enter(&mtm
->mtm_mutex
);
306 bzero(me
, sizeof (mapentry_t
));
309 * initialize and put in deltamap
316 me
->me_flags
= ME_HASH
;
317 me
->me_tid
= mtm
->mtm_tid
;
321 me
->me_next
= (mapentry_t
*)mtm
;
322 me
->me_prev
= mtm
->mtm_prev
;
323 mtm
->mtm_prev
->me_next
= me
;
327 mutex_exit(&mtm
->mtm_mutex
);
329 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
330 map_check_linkage(mtm
));
334 * remove deltas within (mof, nb) and return as linked list
337 deltamap_remove(mt_map_t
*mtm
, offset_t mof
, off_t nb
)
347 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
348 map_check_linkage(mtm
));
350 mutex_enter(&mtm
->mtm_mutex
);
351 for (mer
= NULL
, hnb
= 0; nb
; nb
-= hnb
, mof
+= hnb
) {
352 hnb
= MAPBLOCKSIZE
- (mof
& MAPBLOCKOFF
);
356 * remove entries from hash and return as a aged linked list
358 mep
= MAP_HASH(mof
, mtm
);
359 while ((me
= *mep
) != 0) {
360 if (MEwithinDATA(me
, mof
, hnb
)) {
362 me
->me_next
->me_prev
= me
->me_prev
;
363 me
->me_prev
->me_next
= me
->me_next
;
366 me
->me_flags
|= ME_LIST
;
367 me
->me_flags
&= ~ME_HASH
;
373 mutex_exit(&mtm
->mtm_mutex
);
375 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
376 map_check_linkage(mtm
));
382 * delete entries within (mof, nb)
385 deltamap_del(mt_map_t
*mtm
, offset_t mof
, off_t nb
)
390 menext
= deltamap_remove(mtm
, mof
, nb
);
391 while ((me
= menext
) != 0) {
392 menext
= me
->me_hash
;
393 kmem_cache_free(mapentry_cache
, me
);
398 * Call the indicated function to cause deltas to move to the logmap.
399 * top_end_sync() is the only caller of this function and
400 * it has waited for the completion of all threads, so there can
401 * be no other activity in the deltamap. Therefore we don't need to
402 * hold the deltamap lock.
405 deltamap_push(ml_unit_t
*ul
)
413 mt_map_t
*mtm
= ul
->un_deltamap
;
415 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
416 map_check_linkage(mtm
));
419 * for every entry in the deltamap
421 while ((me
= mtm
->mtm_next
) != (mapentry_t
*)mtm
) {
428 if ((ul
->un_flags
& LDL_ERROR
) ||
429 (*func
)(ul
->un_ufsvfs
, dtyp
, arg
))
430 deltamap_del(mtm
, mof
, nb
);
433 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
434 map_check_linkage(mtm
));
442 logmap_need_commit(mt_map_t
*mtm
)
444 return ((mtm
->mtm_nmet
> logmap_maxnme_commit
) ||
445 (mtm
->mtm_cfrags
>= mtm
->mtm_cfragmax
));
449 logmap_need_roll_async(mt_map_t
*mtm
)
451 return (mtm
->mtm_nme
> logmap_maxnme_async
);
455 logmap_need_roll_sync(mt_map_t
*mtm
)
457 return (mtm
->mtm_nme
> logmap_maxnme_sync
);
461 logmap_start_roll(ml_unit_t
*ul
)
463 mt_map_t
*logmap
= ul
->un_logmap
;
465 logmap_settail(logmap
, ul
);
466 ASSERT(!(ul
->un_flags
& LDL_NOROLL
));
467 mutex_enter(&logmap
->mtm_mutex
);
468 if ((logmap
->mtm_flags
& MTM_ROLL_RUNNING
) == 0) {
469 logmap
->mtm_flags
|= MTM_ROLL_RUNNING
;
470 logmap
->mtm_flags
&= ~(MTM_FORCE_ROLL
| MTM_ROLL_EXIT
);
471 (void) thread_create(NULL
, 0, trans_roll
, ul
, 0, &p0
,
472 TS_RUN
, minclsyspri
);
474 mutex_exit(&logmap
->mtm_mutex
);
478 logmap_kill_roll(ml_unit_t
*ul
)
480 mt_map_t
*mtm
= ul
->un_logmap
;
485 mutex_enter(&mtm
->mtm_mutex
);
487 while (mtm
->mtm_flags
& MTM_ROLL_RUNNING
) {
488 mtm
->mtm_flags
|= MTM_ROLL_EXIT
;
489 cv_signal(&mtm
->mtm_to_roll_cv
);
490 cv_wait(&mtm
->mtm_from_roll_cv
, &mtm
->mtm_mutex
);
492 mutex_exit(&mtm
->mtm_mutex
);
496 * kick the roll thread if it's not doing anything
499 logmap_forceroll_nowait(mt_map_t
*logmap
)
502 * Don't need to lock mtm_mutex to read mtm_flags here as we
503 * don't care in the rare case when we get a transitional value
504 * of mtm_flags. Just by signalling the thread it will wakeup
505 * and notice it has too many logmap entries.
507 ASSERT(!(logmap
->mtm_ul
->un_flags
& LDL_NOROLL
));
508 if ((logmap
->mtm_flags
& MTM_ROLLING
) == 0) {
509 cv_signal(&logmap
->mtm_to_roll_cv
);
514 * kick the roll thread and wait for it to finish a cycle
517 logmap_forceroll(mt_map_t
*mtm
)
519 mutex_enter(&mtm
->mtm_mutex
);
520 if ((mtm
->mtm_flags
& MTM_FORCE_ROLL
) == 0) {
521 mtm
->mtm_flags
|= MTM_FORCE_ROLL
;
522 cv_signal(&mtm
->mtm_to_roll_cv
);
525 if ((mtm
->mtm_flags
& MTM_ROLL_RUNNING
) == 0) {
526 mtm
->mtm_flags
&= ~MTM_FORCE_ROLL
;
529 cv_wait(&mtm
->mtm_from_roll_cv
, &mtm
->mtm_mutex
);
530 } while (mtm
->mtm_flags
& MTM_FORCE_ROLL
);
532 mutex_exit(&mtm
->mtm_mutex
);
536 * remove rolled deltas within (mof, nb) and free them
539 logmap_remove_roll(mt_map_t
*mtm
, offset_t mof
, off_t nb
)
545 offset_t savmof
= mof
;
548 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
549 map_check_linkage(mtm
));
553 rw_enter(&mtm
->mtm_rwlock
, RW_WRITER
);
554 mutex_enter(&mtm
->mtm_mutex
);
555 for (hnb
= 0; nb
; nb
-= hnb
, mof
+= hnb
) {
556 hnb
= MAPBLOCKSIZE
- (mof
& MAPBLOCKOFF
);
560 * remove and free the rolled entries
562 mep
= MAP_HASH(mof
, mtm
);
563 while ((me
= *mep
) != 0) {
564 if ((me
->me_flags
& ME_ROLL
) &&
565 (MEwithinDATA(me
, mof
, hnb
))) {
566 if (me
->me_flags
& ME_AGE
) {
569 mutex_exit(&mtm
->mtm_mutex
);
575 me
->me_next
->me_prev
= me
->me_prev
;
576 me
->me_prev
->me_next
= me
->me_next
;
577 me
->me_flags
&= ~(ME_HASH
|ME_ROLL
);
578 ASSERT(!(me
->me_flags
& ME_USER
));
581 * cancelled entries are handled by someone else
583 if ((me
->me_flags
& ME_CANCEL
) == 0) {
584 roll_stats
[me
->me_dt
]++;
586 kmem_cache_free(mapentry_cache
, me
);
592 mutex_exit(&mtm
->mtm_mutex
);
594 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
595 map_check_linkage(mtm
));
598 rw_exit(&mtm
->mtm_rwlock
);
602 * Find the disk offset of the next delta to roll.
603 * Returns 0: no more deltas to roll or a transaction is being committed
604 * 1: a delta to roll has been found and *mofp points
605 * to the master file disk offset
608 logmap_next_roll(mt_map_t
*logmap
, offset_t
*mofp
)
612 ASSERT(((logmap
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
613 map_check_linkage(logmap
));
615 mutex_enter(&logmap
->mtm_mutex
);
616 for (me
= logmap
->mtm_next
; me
!= (mapentry_t
*)logmap
;
619 if (me
->me_flags
& ME_ROLL
) {
623 /* part of currently busy transaction; stop */
624 if (me
->me_tid
== logmap
->mtm_tid
) {
628 /* part of commit-in-progress transaction; stop */
629 if (me
->me_tid
== logmap
->mtm_committid
) {
634 * We shouldn't see a DT_CANCEL mapentry whose
635 * tid != mtm_committid, or != mtm_tid since
636 * these are removed at the end of each committed
639 ASSERT(!(me
->me_dt
== DT_CANCEL
));
642 mutex_exit(&logmap
->mtm_mutex
);
645 mutex_exit(&logmap
->mtm_mutex
);
650 * put mapentry on sorted age list
653 logmap_list_age(mapentry_t
**age
, mapentry_t
*meadd
)
657 ASSERT(!(meadd
->me_flags
& (ME_AGE
|ME_LIST
)));
659 for (me
= *age
; me
; age
= &me
->me_agenext
, me
= *age
) {
660 if (me
->me_age
> meadd
->me_age
)
663 meadd
->me_agenext
= me
;
664 meadd
->me_flags
|= ME_AGE
;
669 * get a list of deltas within <mof, mof+nb>
670 * returns with mtm_rwlock held
671 * return value says whether the entire mof range is covered by deltas
683 int rwtype
= RW_READER
;
684 offset_t savmof
= mof
;
692 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
693 map_check_linkage(mtm
));
695 rw_enter(&mtm
->mtm_rwlock
, rwtype
);
697 mutex_enter(&mtm
->mtm_mutex
);
698 for (hnb
= 0; nb
; nb
-= hnb
, mof
+= hnb
) {
699 hnb
= MAPBLOCKSIZE
- (mof
& MAPBLOCKOFF
);
703 * find overlapping entries
705 mep
= MAP_HASH(mof
, mtm
);
706 for (me
= *mep
; me
; me
= me
->me_hash
) {
707 if (me
->me_dt
== DT_CANCEL
)
709 if (!DATAoverlapME(mof
, hnb
, me
))
712 * check if map entry is in use
713 * (about to be rolled).
715 if (me
->me_flags
& ME_AGE
) {
717 * reset the age bit in the list,
718 * upgrade the lock, and try again
720 for (me
= *age
; me
; me
= *age
) {
721 *age
= me
->me_agenext
;
722 me
->me_flags
&= ~ME_AGE
;
724 mutex_exit(&mtm
->mtm_mutex
);
725 rw_exit(&mtm
->mtm_rwlock
);
732 /* add mapentry to age ordered list */
733 logmap_list_age(age
, me
);
736 if (DATAwithinCRB(savmof
, savnb
, crb
)) {
740 if (DATAwithinME(savmof
, savnb
, me
)) {
747 mutex_exit(&mtm
->mtm_mutex
);
749 ASSERT(RW_LOCK_HELD(&mtm
->mtm_rwlock
));
754 * Get a list of deltas for rolling - returns sucess or failure.
755 * Also return the cached roll buffer if all deltas point to it.
758 logmap_list_get_roll(mt_map_t
*logmap
, offset_t mof
, rollbuf_t
*rbp
)
760 mapentry_t
*me
, **mep
, *age
= NULL
;
763 ASSERT(RW_LOCK_HELD(&logmap
->mtm_rwlock
));
764 ASSERT(((logmap
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
765 map_check_linkage(logmap
));
766 ASSERT((mof
& MAPBLOCKOFF
) == 0);
771 * find overlapping entries
773 mutex_enter(&logmap
->mtm_mutex
);
774 mep
= MAP_HASH(mof
, logmap
);
775 for (me
= *mep
; me
; me
= me
->me_hash
) {
776 if (!DATAoverlapME(mof
, MAPBLOCKSIZE
, me
))
778 if (me
->me_tid
== logmap
->mtm_tid
)
780 if (me
->me_tid
== logmap
->mtm_committid
)
782 if (me
->me_dt
== DT_CANCEL
)
786 * Check if map entry is in use (by lufs_read_strategy())
787 * and if so reset the age bit in the list,
788 * upgrade the lock, and try again
790 if (me
->me_flags
& ME_AGE
) {
791 for (me
= age
; me
; me
= age
) {
792 age
= me
->me_agenext
;
793 me
->me_flags
&= ~ME_AGE
;
795 mutex_exit(&logmap
->mtm_mutex
);
796 return (1); /* failure */
798 /* add mapentry to age ordered list */
799 logmap_list_age(&age
, me
);
807 * Mark the deltas as being rolled.
809 for (me
= age
; me
; me
= me
->me_agenext
) {
810 me
->me_flags
|= ME_ROLL
;
814 * Test if all deltas are covered by one valid roll buffer
817 if (crb
&& !(crb
->c_invalid
)) {
818 for (me
= age
; me
; me
= me
->me_agenext
) {
819 if (me
->me_crb
!= crb
) {
829 mutex_exit(&logmap
->mtm_mutex
);
831 ASSERT(((logmap
->mtm_debug
& MT_SCAN
) == 0) ||
832 logmap_logscan_debug(logmap
, age
));
833 ASSERT(RW_LOCK_HELD(&logmap
->mtm_rwlock
));
834 return (0); /* success */
838 logmap_list_put_roll(mt_map_t
*mtm
, mapentry_t
*age
)
842 ASSERT(RW_LOCK_HELD(&mtm
->mtm_rwlock
));
843 mutex_enter(&mtm
->mtm_mutex
);
844 for (me
= age
; me
; me
= age
) {
845 age
= me
->me_agenext
;
846 me
->me_flags
&= ~ME_AGE
;
848 mutex_exit(&mtm
->mtm_mutex
);
852 logmap_list_put(mt_map_t
*mtm
, mapentry_t
*age
)
856 ASSERT(RW_LOCK_HELD(&mtm
->mtm_rwlock
));
857 mutex_enter(&mtm
->mtm_mutex
);
858 for (me
= age
; me
; me
= age
) {
859 age
= me
->me_agenext
;
860 me
->me_flags
&= ~ME_AGE
;
862 mutex_exit(&mtm
->mtm_mutex
);
863 rw_exit(&mtm
->mtm_rwlock
);
866 #define UFS_RW_BALANCE 2
867 int ufs_rw_balance
= UFS_RW_BALANCE
;
870 * Check if we need to read the master.
871 * The master does not need to be read if the log deltas to the
872 * block are for one contiguous set of full disk sectors.
873 * Both cylinder group bit maps DT_CG (8K); directory entries (512B);
874 * and possibly others should not require master disk reads.
875 * Calculate the sector map for writing later.
878 logmap_setup_read(mapentry_t
*age
, rollbuf_t
*rbp
)
885 int start_sec
, end_sec
;
888 int first_sec
= INT_MAX
;
890 rbsecmap_t secmap
= 0;
892 /* LINTED: warning: logical expression always true: op "||" */
893 ASSERT((MAPBLOCKSIZE
/ DEV_BSIZE
) == (sizeof (secmap
) * NBBY
));
895 for (me
= age
; me
; me
= me
->me_agenext
) {
906 * If the delta is not sector aligned then
907 * read the whole block.
909 if ((nb
& DEV_BMASK
) || (mof
& DEV_BMASK
)) {
913 /* Set sector map used in the MAPBLOCKSIZE block. */
914 start_sec
= (mof
& MAPBLOCKOFF
) >> DEV_BSHIFT
;
915 end_sec
= start_sec
+ ((nb
- 1) >> DEV_BSHIFT
);
916 for (i
= start_sec
; i
<= end_sec
; i
++) {
917 secmap
|= UINT16_C(1) << i
;
920 if (me
->me_dt
!= DT_INODE
) {
923 if (start_sec
< first_sec
) {
924 first_sec
= start_sec
;
926 if (end_sec
> last_sec
) {
932 ASSERT(first_sec
!= INT_MAX
);
933 ASSERT(last_sec
!= -1);
937 * Here we have a tradeoff choice. It must be better to
938 * do 2 writes * in the same MAPBLOCKSIZE chunk, than a
939 * read and a write. But what about 3 or more writes, versus
940 * a read+write? * Where is the cut over? It will depend on
941 * the track caching, scsi driver and other activity.
942 * A unpublished tunable is defined (ufs_rw_balance) that
943 * currently defaults to 2.
946 int count
= 0, gap
= 0;
947 int sector_set
; /* write needed to this sector */
949 /* Count the gaps (every 1 to 0 transation) */
950 for (i
= first_sec
+ 1; i
< last_sec
; i
++) {
951 sector_set
= secmap
& (UINT16_C(1) << i
);
952 if (!gap
&& !sector_set
) {
955 if (count
> ufs_rw_balance
) {
959 } else if (gap
&& sector_set
) {
966 * Inodes commonly make up the majority (~85%) of deltas.
967 * They cannot contain embedded user data, so its safe to
968 * read and write them all in one IO.
969 * But for directory entries, shadow inode data, and
970 * quota record data the user data fragments can be embedded
971 * betwen those metadata, and so its not safe to read, modify
972 * then write the entire range as user asynchronous user data
973 * writes could get overwritten with old data.
974 * Thus we have to create a segment map of meta data that
975 * needs to get written.
977 * If user data was logged then this issue would go away.
980 for (i
= first_sec
+ 1; i
< last_sec
; i
++) {
981 secmap
|= (UINT16_C(1) << i
);
985 rbp
->rb_secmap
= secmap
;
986 return (read_needed
);
990 * Abort the load of a set of log map delta's.
992 * Clear out all mapentries on this unit's log map
993 * which have a tid (transaction id) equal to the
994 * parameter tid. Walk the cancel list, taking everything
998 logmap_abort(ml_unit_t
*ul
, uint32_t tid
)
1000 struct mt_map
*mtm
= ul
->un_logmap
; /* Log map */
1001 mapentry_t
*me
, **mep
;
1004 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1005 map_check_linkage(mtm
));
1008 * wait for any outstanding reads to finish; lock out future reads
1010 rw_enter(&mtm
->mtm_rwlock
, RW_WRITER
);
1012 mutex_enter(&mtm
->mtm_mutex
);
1013 /* Take everything off cancel list */
1014 while ((me
= mtm
->mtm_cancel
) != NULL
) {
1015 mtm
->mtm_cancel
= me
->me_cancel
;
1016 me
->me_flags
&= ~ME_CANCEL
;
1017 me
->me_cancel
= NULL
;
1021 * Now take out all mapentries with current tid, and committid
1022 * as this function is called from logmap_logscan and logmap_commit
1023 * When it is called from logmap_logscan mtm_tid == mtm_committid
1024 * But when logmap_abort is called from logmap_commit it is
1025 * because the log errored when trying to write the commit record,
1026 * after the async ops have been allowed to start in top_end_sync.
1027 * So we also need to remove all mapentries from the transaction whose
1030 for (i
= 0; i
< mtm
->mtm_nhash
; i
++) {
1031 mep
= &mtm
->mtm_hash
[i
];
1032 while ((me
= *mep
) != NULL
) {
1033 if (me
->me_tid
== tid
||
1034 me
->me_tid
== mtm
->mtm_committid
) {
1036 me
->me_next
->me_prev
= me
->me_prev
;
1037 me
->me_prev
->me_next
= me
->me_next
;
1038 if (!(me
->me_flags
& ME_USER
)) {
1042 kmem_cache_free(mapentry_cache
, me
);
1049 if (!(ul
->un_flags
& LDL_SCAN
))
1050 mtm
->mtm_flags
|= MTM_CANCELED
;
1051 mutex_exit(&mtm
->mtm_mutex
);
1054 rw_exit(&mtm
->mtm_rwlock
);
1056 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1057 map_check_linkage(mtm
));
1061 logmap_wait_space(mt_map_t
*mtm
, ml_unit_t
*ul
, mapentry_t
*me
)
1063 ASSERT(MUTEX_HELD(&ul
->un_log_mutex
));
1065 while (!ldl_has_space(ul
, me
)) {
1066 ASSERT(!(ul
->un_flags
& LDL_NOROLL
));
1067 mutex_exit(&ul
->un_log_mutex
);
1068 logmap_forceroll(mtm
);
1069 mutex_enter(&ul
->un_log_mutex
);
1070 if (ul
->un_flags
& LDL_ERROR
)
1074 ASSERT(MUTEX_HELD(&ul
->un_log_mutex
));
1078 * put a list of deltas into a logmap
1079 * If va == NULL, don't write to the log.
1084 char *va
, /* Ptr to buf w/deltas & data */
1085 offset_t vamof
, /* Offset on master of buf start */
1086 mapentry_t
*melist
) /* Entries to add */
1092 mapentry_t
**savmep
;
1094 mt_map_t
*mtm
= ul
->un_logmap
;
1096 mutex_enter(&ul
->un_log_mutex
);
1098 logmap_wait_space(mtm
, ul
, melist
);
1100 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1101 map_check_linkage(mtm
));
1107 mof
= melist
->me_mof
;
1111 * search for overlaping entries
1113 savmep
= mep
= MAP_HASH(mof
, mtm
);
1114 mutex_enter(&mtm
->mtm_mutex
);
1115 while ((me
= *mep
) != 0) {
1117 * Data consumes old map entry; cancel map entry.
1118 * Take care when we replace an old map entry
1119 * which carries quota information with a newer entry
1120 * which does not. In that case the push function
1121 * would not be called to clean up the dquot structure.
1122 * This would be found later by invalidatedq() causing
1123 * a panic when the filesystem in unmounted.
1124 * We clean up the dquot manually and then replace
1127 if (MEwithinDATA(me
, mof
, nb
) &&
1128 ((me
->me_flags
& (ME_ROLL
|ME_CANCEL
)) == 0)) {
1129 if (tid
== me
->me_tid
&&
1130 ((me
->me_flags
& ME_AGE
) == 0)) {
1132 me
->me_next
->me_prev
= me
->me_prev
;
1133 me
->me_prev
->me_next
= me
->me_next
;
1134 ASSERT(!(me
->me_flags
& ME_USER
));
1137 * Special case if the mapentry
1138 * carries a dquot and a push function.
1139 * We have to clean up the quota info
1140 * before replacing the mapentry.
1142 if (me
->me_dt
== DT_QR
)
1143 HANDLE_DQUOT(me
, melist
);
1145 kmem_cache_free(mapentry_cache
, me
);
1148 me
->me_cancel
= mtm
->mtm_cancel
;
1149 mtm
->mtm_cancel
= me
;
1150 me
->me_flags
|= ME_CANCEL
;
1152 mep
= &(*mep
)->me_hash
;
1154 mutex_exit(&mtm
->mtm_mutex
);
1160 melist
= melist
->me_hash
;
1161 me
->me_flags
&= ~ME_LIST
;
1163 * If va != NULL, put in the log.
1166 ldl_write(ul
, va
, vamof
, me
);
1167 if (ul
->un_flags
& LDL_ERROR
) {
1168 kmem_cache_free(mapentry_cache
, me
);
1171 ASSERT((va
== NULL
) ||
1172 ((mtm
->mtm_debug
& MT_LOG_WRITE_CHECK
) == 0) ||
1173 map_check_ldl_write(ul
, va
, vamof
, me
));
1178 mutex_enter(&mtm
->mtm_mutex
);
1179 me
->me_hash
= *savmep
;
1181 me
->me_next
= (mapentry_t
*)mtm
;
1182 me
->me_prev
= mtm
->mtm_prev
;
1183 mtm
->mtm_prev
->me_next
= me
;
1185 me
->me_flags
|= ME_HASH
;
1187 me
->me_age
= mtm
->mtm_age
++;
1190 mutex_exit(&mtm
->mtm_mutex
);
1193 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1194 map_check_linkage(mtm
));
1195 mutex_exit(&ul
->un_log_mutex
);
1199 * Add the delta(s) into the log.
1200 * Create one cached roll buffer logmap entry, and reference count the
1201 * number of mapentries refering to it.
1202 * Cancel previous logmap entries.
1203 * logmap_add is tolerant of failure to allocate a cached roll buffer.
1208 char *va
, /* Ptr to buf w/deltas & data */
1209 offset_t bufmof
, /* Offset on master of buf start */
1210 mapentry_t
*melist
, /* Entries to add */
1211 caddr_t buf
, /* Buffer containing delta(s) */
1212 uint32_t bufsz
) /* Size of buf */
1215 offset_t vamof
= bufmof
+ (va
- buf
);
1219 mapentry_t
**savmep
;
1221 mt_map_t
*mtm
= ul
->un_logmap
;
1223 crb_t
*crbsav
= NULL
;
1225 ASSERT((bufsz
& DEV_BMASK
) == 0);
1226 mutex_enter(&ul
->un_log_mutex
);
1227 logmap_wait_space(mtm
, ul
, melist
);
1229 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1230 map_check_linkage(mtm
));
1236 mof
= melist
->me_mof
;
1240 * search for overlapping entries
1242 savmep
= mep
= MAP_HASH(mof
, mtm
);
1243 mutex_enter(&mtm
->mtm_mutex
);
1244 while ((me
= *mep
) != 0) {
1246 * Data consumes old map entry; cancel map entry.
1247 * Take care when we replace an old map entry
1248 * which carries quota information with a newer entry
1249 * which does not. In that case the push function
1250 * would not be called to clean up the dquot structure.
1251 * This would be found later by invalidatedq() causing
1252 * a panic when the filesystem in unmounted.
1253 * We clean up the dquot manually and then replace
1257 if (MEwithinDATA(me
, mof
, nb
) &&
1258 ((me
->me_flags
& (ME_ROLL
|ME_CANCEL
)) == 0)) {
1259 if (tid
== me
->me_tid
&&
1260 ((me
->me_flags
& ME_AGE
) == 0)) {
1262 me
->me_next
->me_prev
= me
->me_prev
;
1263 me
->me_prev
->me_next
= me
->me_next
;
1264 ASSERT(!(me
->me_flags
& ME_USER
));
1267 * Special case if the mapentry
1268 * carries a dquot and a push function.
1269 * We have to clean up the quota info
1270 * before replacing the mapentry.
1272 if (me
->me_dt
== DT_QR
)
1273 HANDLE_DQUOT(me
, melist
);
1276 * If this soon to be deleted mapentry
1277 * has a suitable roll buffer then
1280 if (crb
&& (--crb
->c_refcnt
== 0)) {
1282 (crb
->c_nb
!= bufsz
)) {
1285 bcopy(buf
, crb
->c_buf
,
1288 crb
->c_mof
= bufmof
;
1293 kmem_cache_free(mapentry_cache
, me
);
1296 me
->me_cancel
= mtm
->mtm_cancel
;
1297 mtm
->mtm_cancel
= me
;
1298 me
->me_flags
|= ME_CANCEL
;
1302 * Inode deltas within the same fs block come
1303 * in individually as separate calls to logmap_add().
1304 * All others come in as one call. So check for an
1305 * existing entry where we can re-use the crb.
1307 if ((me
->me_dt
== DT_INODE
) && (tid
== me
->me_tid
) &&
1309 WITHIN(mof
, nb
, crb
->c_mof
, crb
->c_nb
)) {
1310 ASSERT(crb
->c_mof
== bufmof
);
1311 ASSERT(crb
->c_nb
== bufsz
);
1312 bcopy(buf
, crb
->c_buf
, bufsz
);
1315 mep
= &(*mep
)->me_hash
;
1317 mutex_exit(&mtm
->mtm_mutex
);
1320 * If we don't already have a crb then allocate one
1321 * and copy the incoming buffer. Only do this once
1322 * for all the incoming deltas.
1324 if ((crbsav
== NULL
) && (melist
->me_dt
!= DT_ABZERO
)) {
1326 * Only use a cached roll buffer if we
1327 * have enough memory, and check for failures.
1329 if (((ufs_crb_size
+ bufsz
) < ufs_crb_limit
) &&
1330 (kmem_avail() > bufsz
)) {
1331 crbsav
= kmem_alloc(sizeof (crb_t
), KM_NOSLEEP
);
1333 ufs_crb_alloc_fails
++;
1336 crbsav
->c_buf
= kmem_alloc(bufsz
, KM_NOSLEEP
);
1337 if (crbsav
->c_buf
) {
1338 atomic_add_64(&ufs_crb_size
,
1340 if (ufs_crb_size
> ufs_crb_max_size
) {
1341 ufs_crb_max_size
= ufs_crb_size
;
1343 bcopy(buf
, crbsav
->c_buf
, bufsz
);
1344 crbsav
->c_nb
= bufsz
;
1345 crbsav
->c_refcnt
= 0;
1346 crbsav
->c_invalid
= 0;
1347 ASSERT((bufmof
& DEV_BMASK
) == 0);
1348 crbsav
->c_mof
= bufmof
;
1350 kmem_free(crbsav
, sizeof (crb_t
));
1360 melist
= melist
->me_hash
;
1361 me
->me_flags
&= ~ME_LIST
;
1362 me
->me_crb
= crbsav
;
1369 ldl_write(ul
, va
, vamof
, me
); /* add to on-disk log */
1370 if (ul
->un_flags
& LDL_ERROR
) {
1372 kmem_cache_free(mapentry_cache
, me
);
1375 ASSERT(((mtm
->mtm_debug
& MT_LOG_WRITE_CHECK
) == 0) ||
1376 map_check_ldl_write(ul
, va
, vamof
, me
));
1381 mutex_enter(&mtm
->mtm_mutex
);
1382 me
->me_hash
= *savmep
;
1384 me
->me_next
= (mapentry_t
*)mtm
;
1385 me
->me_prev
= mtm
->mtm_prev
;
1386 mtm
->mtm_prev
->me_next
= me
;
1388 me
->me_flags
|= ME_HASH
;
1390 me
->me_age
= mtm
->mtm_age
++;
1393 mutex_exit(&mtm
->mtm_mutex
);
1396 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1397 map_check_linkage(mtm
));
1398 mutex_exit(&ul
->un_log_mutex
);
1402 * free up any cancelled deltas
1405 logmap_free_cancel(mt_map_t
*mtm
, mapentry_t
**cancelhead
)
1411 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1412 map_check_linkage(mtm
));
1416 rw_enter(&mtm
->mtm_rwlock
, RW_WRITER
);
1419 * At EOT, cancel the indicated deltas
1421 mutex_enter(&mtm
->mtm_mutex
);
1422 if (mtm
->mtm_flags
& MTM_CANCELED
) {
1423 mtm
->mtm_flags
&= ~MTM_CANCELED
;
1424 ASSERT(dolock
== 0);
1425 mutex_exit(&mtm
->mtm_mutex
);
1429 while ((me
= *cancelhead
) != NULL
) {
1431 * roll forward or read collision; wait and try again
1433 if (me
->me_flags
& ME_AGE
) {
1434 ASSERT(dolock
== 0);
1435 mutex_exit(&mtm
->mtm_mutex
);
1440 * remove from cancel list
1442 *cancelhead
= me
->me_cancel
;
1443 me
->me_cancel
= NULL
;
1444 me
->me_flags
&= ~(ME_CANCEL
);
1447 * logmap_remove_roll handles ME_ROLL entries later
1448 * we leave them around for logmap_iscancel
1449 * XXX is this necessary?
1451 if (me
->me_flags
& ME_ROLL
)
1455 * remove from hash (if necessary)
1457 if (me
->me_flags
& ME_HASH
) {
1458 mep
= MAP_HASH(me
->me_mof
, mtm
);
1462 me
->me_next
->me_prev
= me
->me_prev
;
1463 me
->me_prev
->me_next
= me
->me_next
;
1464 me
->me_flags
&= ~(ME_HASH
);
1465 if (!(me
->me_flags
& ME_USER
)) {
1470 mep
= &(*mep
)->me_hash
;
1474 * put the entry on the free list
1477 kmem_cache_free(mapentry_cache
, me
);
1479 mutex_exit(&mtm
->mtm_mutex
);
1481 rw_exit(&mtm
->mtm_rwlock
);
1483 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1484 map_check_linkage(mtm
));
1489 logmap_commit(ml_unit_t
*ul
, uint32_t tid
)
1492 mt_map_t
*mtm
= ul
->un_logmap
;
1495 ASSERT(MUTEX_HELD(&ul
->un_log_mutex
));
1498 * async'ly write a commit rec into the log
1500 if (mtm
->mtm_dirty
) {
1502 * put commit record into log
1504 me
.me_mof
= mtm
->mtm_tid
;
1505 me
.me_dt
= DT_COMMIT
;
1508 logmap_wait_space(mtm
, ul
, &me
);
1509 ldl_write(ul
, NULL
, (offset_t
)0, &me
);
1510 ldl_round_commit(ul
);
1513 * abort on error; else reset dirty flag
1515 if (ul
->un_flags
& LDL_ERROR
)
1516 logmap_abort(ul
, tid
);
1520 mtm
->mtm_cfrags
= 0;
1523 ldl_push_commit(ul
);
1528 logmap_sethead(mt_map_t
*mtm
, ml_unit_t
*ul
)
1535 * move the head forward so the log knows how full it is
1536 * Make sure to skip any mapentry whose me_lof is 0, these
1537 * are just place holders for DT_CANCELED freed user blocks
1538 * for the current moby.
1540 mutex_enter(&ul
->un_log_mutex
);
1541 mutex_enter(&mtm
->mtm_mutex
);
1543 while (me
!= (mapentry_t
*)mtm
&& me
->me_lof
== 0) {
1547 if (me
== (mapentry_t
*)mtm
)
1553 mutex_exit(&mtm
->mtm_mutex
);
1554 ldl_sethead(ul
, lof
, tid
);
1557 mutex_exit(&ul
->un_log_mutex
);
1561 logmap_settail(mt_map_t
*mtm
, ml_unit_t
*ul
)
1567 * set the tail after the logmap_abort
1569 mutex_enter(&ul
->un_log_mutex
);
1570 mutex_enter(&mtm
->mtm_mutex
);
1571 if (mtm
->mtm_prev
== (mapentry_t
*)mtm
)
1575 * set the tail to the end of the last commit
1577 lof
= mtm
->mtm_tail_lof
;
1578 nb
= mtm
->mtm_tail_nb
;
1580 mutex_exit(&mtm
->mtm_mutex
);
1581 ldl_settail(ul
, lof
, nb
);
1582 mutex_exit(&ul
->un_log_mutex
);
1586 * when reseting a device; roll the log until every
1587 * delta has been rolled forward
1590 logmap_roll_dev(ml_unit_t
*ul
)
1592 mt_map_t
*mtm
= ul
->un_logmap
;
1594 ufsvfs_t
*ufsvfsp
= ul
->un_ufsvfs
;
1597 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1598 map_check_linkage(mtm
));
1599 if (ul
->un_flags
& (LDL_ERROR
|LDL_NOROLL
))
1605 mutex_enter(&mtm
->mtm_mutex
);
1606 for (me
= mtm
->mtm_next
; me
!= (mapentry_t
*)mtm
; me
= me
->me_next
) {
1607 if (me
->me_flags
& ME_ROLL
)
1609 if (me
->me_tid
== mtm
->mtm_tid
)
1611 if (me
->me_tid
== mtm
->mtm_committid
)
1617 * found a delta; kick the roll thread
1618 * but only if the thread is running... (jmh)
1620 if (me
!= (mapentry_t
*)mtm
) {
1621 mutex_exit(&mtm
->mtm_mutex
);
1622 logmap_forceroll(mtm
);
1627 * no more deltas, return
1629 mutex_exit(&mtm
->mtm_mutex
);
1630 (void) ufs_putsummaryinfo(ul
->un_dev
, ufsvfsp
, ufsvfsp
->vfs_fs
);
1632 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1633 map_check_linkage(mtm
));
1637 logmap_cancel_delta(ml_unit_t
*ul
, offset_t mof
, int32_t nb
, int metadata
)
1641 mt_map_t
*mtm
= ul
->un_logmap
;
1645 * map has been referenced and is dirty
1653 me
= kmem_cache_alloc(mapentry_cache
, KM_SLEEP
);
1654 bzero(me
, sizeof (mapentry_t
));
1657 * initialize cancel record and put in logmap
1661 me
->me_dt
= DT_CANCEL
;
1662 me
->me_tid
= mtm
->mtm_tid
;
1666 * Write delta to log if this delta is for metadata. If this is not
1667 * metadata it is user data and we are just putting a cancel
1668 * mapentry into the hash to cancel a user block deletion
1669 * in which we do not want the block to be allocated
1670 * within this moby. This cancel entry will prevent the block from
1671 * being allocated within the moby and prevent user data corruption
1672 * if we happen to crash before this moby is committed.
1674 mutex_enter(&ul
->un_log_mutex
);
1676 logmap_wait_space(mtm
, ul
, me
);
1677 ldl_write(ul
, NULL
, (offset_t
)0, me
);
1678 if (ul
->un_flags
& LDL_ERROR
) {
1679 kmem_cache_free(mapentry_cache
, me
);
1680 mutex_exit(&ul
->un_log_mutex
);
1686 * put in hash and on cancel list
1688 mep
= MAP_HASH(mof
, mtm
);
1689 mutex_enter(&mtm
->mtm_mutex
);
1690 me
->me_age
= mtm
->mtm_age
++;
1693 me
->me_next
= (mapentry_t
*)mtm
;
1694 me
->me_prev
= mtm
->mtm_prev
;
1695 mtm
->mtm_prev
->me_next
= me
;
1697 me
->me_cancel
= mtm
->mtm_cancel
;
1698 mtm
->mtm_cancel
= me
;
1703 me
->me_flags
= ME_USER
;
1705 me
->me_flags
|= (ME_HASH
|ME_CANCEL
);
1707 frags
= blkoff(ul
->un_ufsvfs
->vfs_fs
, nb
);
1710 numfrags(ul
->un_ufsvfs
->vfs_fs
, frags
);
1712 mutex_exit(&mtm
->mtm_mutex
);
1714 mutex_exit(&ul
->un_log_mutex
);
1718 * cancel entries in a logmap (entries are freed at EOT)
1721 logmap_cancel(ml_unit_t
*ul
, offset_t mof
, off_t nb
, int metadata
)
1726 mt_map_t
*mtm
= ul
->un_logmap
;
1729 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1730 map_check_linkage(mtm
));
1732 for (hnb
= 0; nb
; nb
-= hnb
, mof
+= hnb
) {
1733 hnb
= MAPBLOCKSIZE
- (mof
& MAPBLOCKOFF
);
1737 * Find overlapping metadata entries. Don't search through
1738 * the hash chains if this is user data because it is only
1739 * possible to have overlapping map entries for metadata,
1740 * and the search can become expensive for large files.
1743 mep
= MAP_HASH(mof
, mtm
);
1744 mutex_enter(&mtm
->mtm_mutex
);
1745 for (me
= *mep
; me
; me
= me
->me_hash
) {
1746 if (!DATAoverlapME(mof
, hnb
, me
))
1749 ASSERT(MEwithinDATA(me
, mof
, hnb
));
1751 if ((me
->me_flags
& ME_CANCEL
) == 0) {
1752 me
->me_cancel
= mtm
->mtm_cancel
;
1753 mtm
->mtm_cancel
= me
;
1754 me
->me_flags
|= ME_CANCEL
;
1761 mutex_exit(&mtm
->mtm_mutex
);
1765 * put a cancel record into the log
1767 logmap_cancel_delta(ul
, mof
, hnb
, metadata
);
1770 ASSERT(((mtm
->mtm_debug
& MT_CHECK_MAP
) == 0) ||
1771 map_check_linkage(mtm
));
1775 * check for overlap w/cancel delta
1778 logmap_iscancel(mt_map_t
*mtm
, offset_t mof
, off_t nb
)
1784 mutex_enter(&mtm
->mtm_mutex
);
1785 for (hnb
= 0; nb
; nb
-= hnb
, mof
+= hnb
) {
1786 hnb
= MAPBLOCKSIZE
- (mof
& MAPBLOCKOFF
);
1790 * search for dup entry
1792 mep
= MAP_HASH(mof
, mtm
);
1793 for (me
= *mep
; me
; me
= me
->me_hash
) {
1794 if (((me
->me_flags
& ME_ROLL
) == 0) &&
1795 (me
->me_dt
!= DT_CANCEL
))
1797 if (DATAoverlapME(mof
, hnb
, me
))
1805 mutex_exit(&mtm
->mtm_mutex
);
1809 mutex_exit(&mtm
->mtm_mutex
);
1814 logmap_logscan_add(ml_unit_t
*ul
, struct delta
*dp
, off_t lof
, size_t *nbp
)
1818 mt_map_t
*mtm
= ul
->un_logmap
;
1821 * verify delta header; failure == mediafail
1825 if ((dp
->d_typ
<= DT_NONE
) || (dp
->d_typ
>= DT_MAX
))
1827 if (dp
->d_typ
== DT_COMMIT
) {
1828 if (dp
->d_nb
!= INT32_C(0) && dp
->d_nb
!= INT32_C(-1))
1831 /* length of delta */
1832 if ((dp
->d_nb
< INT32_C(0)) ||
1833 (dp
->d_nb
> INT32_C(MAPBLOCKSIZE
)))
1836 /* offset on master device */
1837 if (dp
->d_mof
< INT64_C(0))
1842 ldl_seterror(ul
, "Error processing ufs log data during scan");
1847 * process commit record
1849 if (dp
->d_typ
== DT_COMMIT
) {
1850 if (mtm
->mtm_dirty
) {
1851 ASSERT(dp
->d_nb
== INT32_C(0));
1852 logmap_free_cancel(mtm
, &mtm
->mtm_cancel
);
1856 mtm
->mtm_committid
= mtm
->mtm_tid
;
1857 ASSERT(((mtm
->mtm_debug
& MT_SCAN
) == 0) ||
1858 logmap_logscan_commit_debug(lof
, mtm
));
1861 * return #bytes to next sector (next delta header)
1863 *nbp
= ldl_logscan_nbcommit(lof
);
1864 mtm
->mtm_tail_lof
= lof
;
1865 mtm
->mtm_tail_nb
= *nbp
;
1870 * add delta to logmap
1872 me
= kmem_cache_alloc(mapentry_cache
, KM_SLEEP
);
1873 bzero(me
, sizeof (mapentry_t
));
1875 me
->me_mof
= dp
->d_mof
;
1876 me
->me_nb
= dp
->d_nb
;
1877 me
->me_tid
= mtm
->mtm_tid
;
1878 me
->me_dt
= dp
->d_typ
;
1880 me
->me_flags
= (ME_LIST
| ME_SCAN
);
1881 logmap_add(ul
, NULL
, 0, me
);
1882 switch (dp
->d_typ
) {
1884 me
->me_flags
|= ME_CANCEL
;
1885 me
->me_cancel
= mtm
->mtm_cancel
;
1886 mtm
->mtm_cancel
= me
;
1889 ASSERT(((mtm
->mtm_debug
& MT_SCAN
) == 0) ||
1890 logmap_logscan_add_debug(dp
, mtm
));
1896 * return #bytes till next delta header
1898 if ((dp
->d_typ
== DT_CANCEL
) || (dp
->d_typ
== DT_ABZERO
))
1906 logmap_logscan(ml_unit_t
*ul
)
1911 mt_map_t
*logmap
= ul
->un_logmap
;
1913 ASSERT(ul
->un_deltamap
->mtm_next
== (mapentry_t
*)ul
->un_deltamap
);
1916 * prepare the log for a logscan
1918 ldl_logscan_begin(ul
);
1921 * prepare the logmap for a logscan
1923 (void) map_free_entries(logmap
);
1924 logmap
->mtm_tid
= 0;
1925 logmap
->mtm_committid
= UINT32_C(0);
1926 logmap
->mtm_age
= 0;
1927 logmap
->mtm_dirty
= 0;
1928 logmap
->mtm_ref
= 0;
1931 * while not at end of log
1934 * seek to beginning of next delta
1936 lof
= ul
->un_head_lof
;
1937 nbd
= sizeof (delta
);
1938 while (lof
!= ul
->un_tail_lof
) {
1940 /* read delta header */
1941 if (ldl_logscan_read(ul
, &lof
, nbd
, (caddr_t
)&delta
))
1945 if (logmap_logscan_add(ul
, &delta
, lof
, &nb
))
1948 /* seek to next header (skip data) */
1949 if (ldl_logscan_read(ul
, &lof
, nb
, NULL
))
1954 * remove the last partial transaction from the logmap
1956 logmap_abort(ul
, logmap
->mtm_tid
);
1958 ldl_logscan_end(ul
);
1965 * Initialise the mapentry cache. No constructor or deconstructor
1966 * is needed. Also no reclaim function is supplied as reclaiming
1967 * current entries is not possible.
1969 mapentry_cache
= kmem_cache_create("lufs_mapentry_cache",
1970 sizeof (mapentry_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
1974 * Special case when we replace an old map entry which carries quota
1975 * information with a newer entry which does not.
1976 * In that case the push function would not be called to clean up the
1977 * dquot structure. This would be found later by invalidatedq() causing
1978 * a panic when the filesystem in unmounted.
1979 * We clean up the dquot manually before replacing the map entry.
1982 handle_dquot(mapentry_t
*me
)
1988 dqp
= (struct dquot
*)me
->me_arg
;
1991 * We need vfs_dqrwlock to call dqput()
1993 dolock
= (!RW_LOCK_HELD(&dqp
->dq_ufsvfsp
->vfs_dqrwlock
));
1995 rw_enter(&dqp
->dq_ufsvfsp
->vfs_dqrwlock
, RW_READER
);
1997 domutex
= (!MUTEX_HELD(&dqp
->dq_lock
));
1999 mutex_enter(&dqp
->dq_lock
);
2002 * Only clean up if the dquot is referenced
2004 if (dqp
->dq_cnt
== 0) {
2006 mutex_exit(&dqp
->dq_lock
);
2008 rw_exit(&dqp
->dq_ufsvfsp
->vfs_dqrwlock
);
2012 dqp
->dq_flags
&= ~(DQ_MOD
|DQ_TRANS
);
2016 mutex_exit(&dqp
->dq_lock
);
2019 rw_exit(&dqp
->dq_ufsvfsp
->vfs_dqrwlock
);