1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) International Business Machines Corp., 2000-2005
6 * jfs_xtree.c: extent allocation descriptor B+-tree manager
10 #include <linux/module.h>
11 #include <linux/quotaops.h>
12 #include <linux/seq_file.h>
13 #include "jfs_incore.h"
14 #include "jfs_filsys.h"
15 #include "jfs_metapage.h"
17 #include "jfs_dinode.h"
18 #include "jfs_superblock.h"
19 #include "jfs_debug.h"
24 #define XT_INSERT 0x00000001
27 * xtree key/entry comparison: extent offset
30 * -1: k < start of extent
31 * 0: start_of_extent <= k <= end_of_extent
32 * 1: k > end_of_extent
34 #define XT_CMP(CMP, K, X, OFFSET64)\
36 OFFSET64 = offsetXAD(X);\
37 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
38 ((K) < OFFSET64) ? -1 : 0;\
41 /* write a xad entry */
42 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
44 (XAD)->flag = (FLAG);\
45 XADoffset((XAD), (OFF));\
46 XADlength((XAD), (LEN));\
47 XADaddress((XAD), (ADDR));\
50 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
52 /* get page buffer for specified block address */
53 /* ToDo: Replace this ugly macro with a function */
54 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \
56 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \
58 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
59 (le16_to_cpu((P)->header.nextindex) > \
60 le16_to_cpu((P)->header.maxentry)) || \
61 (le16_to_cpu((P)->header.maxentry) > \
62 (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
63 jfs_error((IP)->i_sb, \
64 "XT_GETPAGE: xtree page corrupt\n"); \
73 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
75 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
76 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
77 /* xtree entry parameter descriptor */
85 struct pxdlist
*pxdlist
;
92 #ifdef CONFIG_JFS_STATISTICS
104 static int xtSearch(struct inode
*ip
, s64 xoff
, s64
*next
, int *cmpp
,
105 struct btstack
* btstack
, int flag
);
107 static int xtSplitUp(tid_t tid
,
109 struct xtsplit
* split
, struct btstack
* btstack
);
111 static int xtSplitPage(tid_t tid
, struct inode
*ip
, struct xtsplit
* split
,
112 struct metapage
** rmpp
, s64
* rbnp
);
114 static int xtSplitRoot(tid_t tid
, struct inode
*ip
,
115 struct xtsplit
* split
, struct metapage
** rmpp
);
117 #ifdef _STILL_TO_PORT
118 static int xtDeleteUp(tid_t tid
, struct inode
*ip
, struct metapage
* fmp
,
119 xtpage_t
* fp
, struct btstack
* btstack
);
121 static int xtSearchNode(struct inode
*ip
,
123 int *cmpp
, struct btstack
* btstack
, int flag
);
125 static int xtRelink(tid_t tid
, struct inode
*ip
, xtpage_t
* fp
);
126 #endif /* _STILL_TO_PORT */
131 * function: map a single page into a physical extent;
133 int xtLookup(struct inode
*ip
, s64 lstart
,
134 s64 llen
, int *pflag
, s64
* paddr
, s32
* plen
, int no_check
)
137 struct btstack btstack
;
144 s64 next
, size
, xoff
, xend
;
152 /* is lookup offset beyond eof ? */
153 size
= ((u64
) ip
->i_size
+ (JFS_SBI(ip
->i_sb
)->bsize
- 1)) >>
154 JFS_SBI(ip
->i_sb
)->l2bsize
;
160 * search for the xad entry covering the logical extent
163 if ((rc
= xtSearch(ip
, lstart
, &next
, &cmp
, &btstack
, 0))) {
164 jfs_err("xtLookup: xtSearch returned %d", rc
);
169 * compute the physical extent covering logical extent
171 * N.B. search may have failed (e.g., hole in sparse file),
172 * and returned the index of the next entry.
174 /* retrieve search result */
175 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
177 /* is xad found covering start of logical extent ?
178 * lstart is a page start address,
179 * i.e., lstart cannot start in a hole;
183 *plen
= min(next
- lstart
, llen
);
190 xad
= &p
->xad
[index
];
191 xoff
= offsetXAD(xad
);
192 xlen
= lengthXAD(xad
);
194 xaddr
= addressXAD(xad
);
196 /* initialize new pxd */
198 *paddr
= xaddr
+ (lstart
- xoff
);
199 /* a page must be fully covered by an xad */
200 *plen
= min(xend
- lstart
, llen
);
211 * function: search for the xad entry covering specified offset.
215 * xoff - extent offset;
216 * nextp - address of next extent (if any) for search miss
217 * cmpp - comparison result:
218 * btstack - traverse stack;
219 * flag - search process flag (XT_INSERT);
222 * btstack contains (bn, index) of search path traversed to the entry.
223 * *cmpp is set to result of comparison with the entry returned.
224 * the page containing the entry is pinned at exit.
226 static int xtSearch(struct inode
*ip
, s64 xoff
, s64
*nextp
,
227 int *cmpp
, struct btstack
* btstack
, int flag
)
229 struct jfs_inode_info
*jfs_ip
= JFS_IP(ip
);
231 int cmp
= 1; /* init for empty page */
232 s64 bn
; /* block number */
233 struct metapage
*mp
; /* page buffer */
234 xtpage_t
*p
; /* page */
236 int base
, index
, lim
, btindex
;
237 struct btframe
*btsp
;
238 int nsplit
= 0; /* number of pages to split */
242 INCREMENT(xtStat
.search
);
249 * search down tree from root:
251 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
252 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
254 * if entry with search key K is not found
255 * internal page search find the entry with largest key Ki
256 * less than K which point to the child page to search;
257 * leaf page search find the entry with smallest key Kj
258 * greater than K so that the returned index is the position of
259 * the entry to be shifted right for insertion of new entry.
260 * for empty tree, search key is greater than any key of the tree.
262 * by convention, root bn = 0.
265 /* get/pin the page to search */
266 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
270 /* try sequential access heuristics with the previous
271 * access entry in target leaf page:
272 * once search narrowed down into the target leaf,
273 * key must either match an entry in the leaf or
274 * key entry does not exist in the tree;
277 if ((jfs_ip
->btorder
& BT_SEQUENTIAL
) &&
278 (p
->header
.flag
& BT_LEAF
) &&
279 (index
= jfs_ip
->btindex
) <
280 le16_to_cpu(p
->header
.nextindex
)) {
281 xad
= &p
->xad
[index
];
282 t64
= offsetXAD(xad
);
283 if (xoff
< t64
+ lengthXAD(xad
)) {
289 /* stop sequential access heuristics */
291 } else { /* (t64 + lengthXAD(xad)) <= xoff */
293 /* try next sequential entry */
296 le16_to_cpu(p
->header
.nextindex
)) {
298 t64
= offsetXAD(xad
);
299 if (xoff
< t64
+ lengthXAD(xad
)) {
305 /* miss: key falls between
306 * previous and this entry
313 /* (xoff >= t64 + lengthXAD(xad));
314 * matching entry may be further out:
315 * stop heuristic search
317 /* stop sequential access heuristics */
321 /* (index == p->header.nextindex);
322 * miss: key entry does not exist in
323 * the target leaf/tree
330 * if hit, return index of the entry found, and
331 * if miss, where new entry with search key is
335 /* compute number of pages to split */
336 if (flag
& XT_INSERT
) {
337 if (p
->header
.nextindex
== /* little-endian */
342 btstack
->nsplit
= nsplit
;
345 /* save search result */
351 /* update sequential access heuristics */
352 jfs_ip
->btindex
= index
;
357 INCREMENT(xtStat
.fastSearch
);
361 /* well, ... full search now */
363 lim
= le16_to_cpu(p
->header
.nextindex
) - XTENTRYSTART
;
366 * binary search with search key K on the current page
368 for (base
= XTENTRYSTART
; lim
; lim
>>= 1) {
369 index
= base
+ (lim
>> 1);
371 XT_CMP(cmp
, xoff
, &p
->xad
[index
], t64
);
376 /* search hit - leaf page:
377 * return the entry found
379 if (p
->header
.flag
& BT_LEAF
) {
382 /* compute number of pages to split */
383 if (flag
& XT_INSERT
) {
384 if (p
->header
.nextindex
==
389 btstack
->nsplit
= nsplit
;
392 /* save search result */
398 /* init sequential access heuristics */
399 btindex
= jfs_ip
->btindex
;
400 if (index
== btindex
||
401 index
== btindex
+ 1)
402 jfs_ip
->btorder
= BT_SEQUENTIAL
;
404 jfs_ip
->btorder
= BT_RANDOM
;
405 jfs_ip
->btindex
= index
;
409 /* search hit - internal page:
410 * descend/search its child page
412 if (index
< le16_to_cpu(p
->header
.nextindex
)-1)
413 next
= offsetXAD(&p
->xad
[index
+ 1]);
426 * base is the smallest index with key (Kj) greater than
427 * search key (K) and may be zero or maxentry index.
429 if (base
< le16_to_cpu(p
->header
.nextindex
))
430 next
= offsetXAD(&p
->xad
[base
]);
432 * search miss - leaf page:
434 * return location of entry (base) where new entry with
435 * search key K is to be inserted.
437 if (p
->header
.flag
& BT_LEAF
) {
440 /* compute number of pages to split */
441 if (flag
& XT_INSERT
) {
442 if (p
->header
.nextindex
==
447 btstack
->nsplit
= nsplit
;
450 /* save search result */
456 /* init sequential access heuristics */
457 btindex
= jfs_ip
->btindex
;
458 if (base
== btindex
|| base
== btindex
+ 1)
459 jfs_ip
->btorder
= BT_SEQUENTIAL
;
461 jfs_ip
->btorder
= BT_RANDOM
;
462 jfs_ip
->btindex
= base
;
471 * search miss - non-leaf page:
473 * if base is non-zero, decrement base by one to get the parent
474 * entry of the child page to search.
476 index
= base
? base
- 1 : base
;
479 * go down to child page
482 /* update number of pages to split */
483 if (p
->header
.nextindex
== p
->header
.maxentry
)
488 /* push (bn, index) of the parent page/entry */
489 if (BT_STACK_FULL(btstack
)) {
490 jfs_error(ip
->i_sb
, "stack overrun!\n");
494 BT_PUSH(btstack
, bn
, index
);
496 /* get the child page block number */
497 bn
= addressXAD(&p
->xad
[index
]);
499 /* unpin the parent page */
510 * tid - transaction id;
512 * xflag - extent flag (XAD_NOTRECORDED):
513 * xoff - extent offset;
514 * xlen - extent length;
515 * xaddrp - extent address pointer (in/out):
517 * caller allocated data extent at *xaddrp;
519 * allocate data extent and return its xaddr;
524 int xtInsert(tid_t tid
, /* transaction id */
525 struct inode
*ip
, int xflag
, s64 xoff
, s32 xlen
, s64
* xaddrp
,
530 struct metapage
*mp
; /* meta-page buffer */
531 xtpage_t
*p
; /* base B+-tree index page */
533 int index
, nextindex
;
534 struct btstack btstack
; /* traverse stack */
535 struct xtsplit split
; /* split information */
540 struct xtlock
*xtlck
;
542 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong
) xoff
, xlen
);
545 * search for the entry location at which to insert:
547 * xtFastSearch() and xtSearch() both returns (leaf page
548 * pinned, index at which to insert).
549 * n.b. xtSearch() may return index of maxentry of
552 if ((rc
= xtSearch(ip
, xoff
, &next
, &cmp
, &btstack
, XT_INSERT
)))
555 /* retrieve search result */
556 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
558 /* This test must follow XT_GETSEARCH since mp must be valid if
559 * we branch to out: */
560 if ((cmp
== 0) || (next
&& (xlen
> next
- xoff
))) {
566 * allocate data extent requested
568 * allocation hint: last xad
570 if ((xaddr
= *xaddrp
) == 0) {
571 if (index
> XTENTRYSTART
) {
572 xad
= &p
->xad
[index
- 1];
573 hint
= addressXAD(xad
) + lengthXAD(xad
) - 1;
576 if ((rc
= dquot_alloc_block(ip
, xlen
)))
578 if ((rc
= dbAlloc(ip
, hint
, (s64
) xlen
, &xaddr
))) {
579 dquot_free_block(ip
, xlen
);
585 * insert entry for new extent
590 * if the leaf page is full, split the page and
591 * propagate up the router entry for the new page from split
593 * The xtSplitUp() will insert the entry and unpin the leaf page.
595 nextindex
= le16_to_cpu(p
->header
.nextindex
);
596 if (nextindex
== le16_to_cpu(p
->header
.maxentry
)) {
603 split
.pxdlist
= NULL
;
604 if ((rc
= xtSplitUp(tid
, ip
, &split
, &btstack
))) {
605 /* undo data extent allocation */
607 dbFree(ip
, xaddr
, (s64
) xlen
);
608 dquot_free_block(ip
, xlen
);
618 * insert the new entry into the leaf page
621 * acquire a transaction lock on the leaf page;
623 * action: xad insertion/extension;
625 BT_MARK_DIRTY(mp
, ip
);
627 /* if insert into middle, shift right remaining entries. */
628 if (index
< nextindex
)
629 memmove(&p
->xad
[index
+ 1], &p
->xad
[index
],
630 (nextindex
- index
) * sizeof(xad_t
));
632 /* insert the new entry: mark the entry NEW */
633 xad
= &p
->xad
[index
];
634 XT_PUTENTRY(xad
, xflag
, xoff
, xlen
, xaddr
);
636 /* advance next available entry index */
637 le16_add_cpu(&p
->header
.nextindex
, 1);
639 /* Don't log it if there are no links to the file */
640 if (!test_cflag(COMMIT_Nolink
, ip
)) {
641 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckGROW
);
642 xtlck
= (struct xtlock
*) & tlck
->lock
;
644 (xtlck
->lwm
.offset
) ? min(index
,
645 (int)xtlck
->lwm
.offset
) : index
;
647 le16_to_cpu(p
->header
.nextindex
) - xtlck
->lwm
.offset
;
653 /* unpin the leaf page */
664 * split full pages as propagating insertion up the tree
667 * tid - transaction id;
669 * split - entry parameter descriptor;
670 * btstack - traverse stack from xtSearch()
676 struct inode
*ip
, struct xtsplit
* split
, struct btstack
* btstack
)
679 struct metapage
*smp
;
680 xtpage_t
*sp
; /* split page */
681 struct metapage
*rmp
;
682 s64 rbn
; /* new right page block number */
683 struct metapage
*rcmp
;
684 xtpage_t
*rcp
; /* right child page */
685 s64 rcbn
; /* right child page block number */
686 int skip
; /* index of entry of insertion */
687 int nextindex
; /* next available entry index of p */
688 struct btframe
*parent
; /* parent page entry on traverse stack */
692 int nsplit
; /* number of pages split */
693 struct pxdlist pxdlist
;
696 struct xtlock
*xtlck
;
699 sp
= XT_PAGE(ip
, smp
);
701 /* is inode xtree root extension/inline EA area free ? */
702 if ((sp
->header
.flag
& BT_ROOT
) && (!S_ISDIR(ip
->i_mode
)) &&
703 (le16_to_cpu(sp
->header
.maxentry
) < XTROOTMAXSLOT
) &&
704 (JFS_IP(ip
)->mode2
& INLINEEA
)) {
705 sp
->header
.maxentry
= cpu_to_le16(XTROOTMAXSLOT
);
706 JFS_IP(ip
)->mode2
&= ~INLINEEA
;
708 BT_MARK_DIRTY(smp
, ip
);
710 * acquire a transaction lock on the leaf page;
712 * action: xad insertion/extension;
715 /* if insert into middle, shift right remaining entries. */
717 nextindex
= le16_to_cpu(sp
->header
.nextindex
);
718 if (skip
< nextindex
)
719 memmove(&sp
->xad
[skip
+ 1], &sp
->xad
[skip
],
720 (nextindex
- skip
) * sizeof(xad_t
));
722 /* insert the new entry: mark the entry NEW */
723 xad
= &sp
->xad
[skip
];
724 XT_PUTENTRY(xad
, split
->flag
, split
->off
, split
->len
,
727 /* advance next available entry index */
728 le16_add_cpu(&sp
->header
.nextindex
, 1);
730 /* Don't log it if there are no links to the file */
731 if (!test_cflag(COMMIT_Nolink
, ip
)) {
732 tlck
= txLock(tid
, ip
, smp
, tlckXTREE
| tlckGROW
);
733 xtlck
= (struct xtlock
*) & tlck
->lock
;
734 xtlck
->lwm
.offset
= (xtlck
->lwm
.offset
) ?
735 min(skip
, (int)xtlck
->lwm
.offset
) : skip
;
737 le16_to_cpu(sp
->header
.nextindex
) -
745 * allocate new index blocks to cover index page split(s)
749 if (split
->pxdlist
== NULL
) {
750 nsplit
= btstack
->nsplit
;
751 split
->pxdlist
= &pxdlist
;
752 pxdlist
.maxnpxd
= pxdlist
.npxd
= 0;
753 pxd
= &pxdlist
.pxd
[0];
754 xlen
= JFS_SBI(ip
->i_sb
)->nbperpage
;
755 for (; nsplit
> 0; nsplit
--, pxd
++) {
756 if ((rc
= dbAlloc(ip
, (s64
) 0, (s64
) xlen
, &xaddr
))
758 PXDaddress(pxd
, xaddr
);
759 PXDlength(pxd
, xlen
);
766 /* undo allocation */
774 * Split leaf page <sp> into <sp> and a new right page <rp>.
776 * The split routines insert the new entry into the leaf page,
777 * and acquire txLock as appropriate.
778 * return <rp> pinned and its block number <rpbn>.
780 rc
= (sp
->header
.flag
& BT_ROOT
) ?
781 xtSplitRoot(tid
, ip
, split
, &rmp
) :
782 xtSplitPage(tid
, ip
, split
, &rmp
, &rbn
);
789 * propagate up the router entry for the leaf page just split
791 * insert a router entry for the new page into the parent page,
792 * propagate the insert/split up the tree by walking back the stack
793 * of (bn of parent page, index of child page entry in parent page)
794 * that were traversed during the search for the page that split.
796 * the propagation of insert/split up the tree stops if the root
797 * splits or the page inserted into doesn't have to split to hold
800 * the parent entry for the split page remains the same, and
801 * a new entry is inserted at its right with the first key and
802 * block number of the new right page.
804 * There are a maximum of 3 pages pinned at any time:
805 * right child, left parent and right parent (when the parent splits)
806 * to keep the child page pinned while working on the parent.
807 * make sure that all pins are released at exit.
809 while ((parent
= BT_POP(btstack
)) != NULL
) {
810 /* parent page specified by stack frame <parent> */
812 /* keep current child pages <rcp> pinned */
815 rcp
= XT_PAGE(ip
, rcmp
);
818 * insert router entry in parent for new right child page <rp>
820 /* get/pin the parent page <sp> */
821 XT_GETPAGE(ip
, parent
->bn
, smp
, PSIZE
, sp
, rc
);
828 * The new key entry goes ONE AFTER the index of parent entry,
829 * because the split was to the right.
831 skip
= parent
->index
+ 1;
834 * split or shift right remaining entries of the parent page
836 nextindex
= le16_to_cpu(sp
->header
.nextindex
);
838 * parent page is full - split the parent page
840 if (nextindex
== le16_to_cpu(sp
->header
.maxentry
)) {
841 /* init for parent page split */
843 split
->index
= skip
; /* index at insert */
844 split
->flag
= XAD_NEW
;
845 split
->off
= offsetXAD(&rcp
->xad
[XTENTRYSTART
]);
846 split
->len
= JFS_SBI(ip
->i_sb
)->nbperpage
;
849 /* unpin previous right child page */
852 /* The split routines insert the new entry,
853 * and acquire txLock as appropriate.
854 * return <rp> pinned and its block number <rpbn>.
856 rc
= (sp
->header
.flag
& BT_ROOT
) ?
857 xtSplitRoot(tid
, ip
, split
, &rmp
) :
858 xtSplitPage(tid
, ip
, split
, &rmp
, &rbn
);
865 /* keep new child page <rp> pinned */
868 * parent page is not full - insert in parent page
872 * insert router entry in parent for the right child
873 * page from the first entry of the right child page:
876 * acquire a transaction lock on the parent page;
878 * action: router xad insertion;
880 BT_MARK_DIRTY(smp
, ip
);
883 * if insert into middle, shift right remaining entries
885 if (skip
< nextindex
)
886 memmove(&sp
->xad
[skip
+ 1], &sp
->xad
[skip
],
888 skip
) << L2XTSLOTSIZE
);
890 /* insert the router entry */
891 xad
= &sp
->xad
[skip
];
892 XT_PUTENTRY(xad
, XAD_NEW
,
893 offsetXAD(&rcp
->xad
[XTENTRYSTART
]),
894 JFS_SBI(ip
->i_sb
)->nbperpage
, rcbn
);
896 /* advance next available entry index. */
897 le16_add_cpu(&sp
->header
.nextindex
, 1);
899 /* Don't log it if there are no links to the file */
900 if (!test_cflag(COMMIT_Nolink
, ip
)) {
901 tlck
= txLock(tid
, ip
, smp
,
902 tlckXTREE
| tlckGROW
);
903 xtlck
= (struct xtlock
*) & tlck
->lock
;
904 xtlck
->lwm
.offset
= (xtlck
->lwm
.offset
) ?
905 min(skip
, (int)xtlck
->lwm
.offset
) : skip
;
907 le16_to_cpu(sp
->header
.nextindex
) -
911 /* unpin parent page */
914 /* exit propagate up */
919 /* unpin current right page */
930 * split a full non-root page into
931 * original/split/left page and new right page
932 * i.e., the original/split page remains as left page.
937 * struct xtsplit *split,
938 * struct metapage **rmpp,
942 * Pointer to page in which to insert or NULL on error.
945 xtSplitPage(tid_t tid
, struct inode
*ip
,
946 struct xtsplit
* split
, struct metapage
** rmpp
, s64
* rbnp
)
949 struct metapage
*smp
;
951 struct metapage
*rmp
;
952 xtpage_t
*rp
; /* new right page allocated */
953 s64 rbn
; /* new right page block number */
957 int skip
, maxentry
, middle
, righthalf
, n
;
959 struct pxdlist
*pxdlist
;
962 struct xtlock
*sxtlck
= NULL
, *rxtlck
= NULL
;
963 int quota_allocation
= 0;
966 sp
= XT_PAGE(ip
, smp
);
968 INCREMENT(xtStat
.split
);
970 pxdlist
= split
->pxdlist
;
971 pxd
= &pxdlist
->pxd
[pxdlist
->npxd
];
973 rbn
= addressPXD(pxd
);
975 /* Allocate blocks to quota. */
976 rc
= dquot_alloc_block(ip
, lengthPXD(pxd
));
980 quota_allocation
+= lengthPXD(pxd
);
983 * allocate the new right page for the split
985 rmp
= get_metapage(ip
, rbn
, PSIZE
, 1);
991 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip
, smp
, rmp
);
993 BT_MARK_DIRTY(rmp
, ip
);
998 rp
= (xtpage_t
*) rmp
->data
;
999 rp
->header
.self
= *pxd
;
1000 rp
->header
.flag
= sp
->header
.flag
& BT_TYPE
;
1001 rp
->header
.maxentry
= sp
->header
.maxentry
; /* little-endian */
1002 rp
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
);
1004 BT_MARK_DIRTY(smp
, ip
);
1005 /* Don't log it if there are no links to the file */
1006 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1008 * acquire a transaction lock on the new right page;
1010 tlck
= txLock(tid
, ip
, rmp
, tlckXTREE
| tlckNEW
);
1011 rxtlck
= (struct xtlock
*) & tlck
->lock
;
1012 rxtlck
->lwm
.offset
= XTENTRYSTART
;
1014 * acquire a transaction lock on the split page
1016 tlck
= txLock(tid
, ip
, smp
, tlckXTREE
| tlckGROW
);
1017 sxtlck
= (struct xtlock
*) & tlck
->lock
;
1021 * initialize/update sibling pointers of <sp> and <rp>
1023 nextbn
= le64_to_cpu(sp
->header
.next
);
1024 rp
->header
.next
= cpu_to_le64(nextbn
);
1025 rp
->header
.prev
= cpu_to_le64(addressPXD(&sp
->header
.self
));
1026 sp
->header
.next
= cpu_to_le64(rbn
);
1028 skip
= split
->index
;
1031 * sequential append at tail (after last entry of last page)
1033 * if splitting the last page on a level because of appending
1034 * a entry to it (skip is maxentry), it's likely that the access is
1035 * sequential. adding an empty page on the side of the level is less
1036 * work and can push the fill factor much higher than normal.
1037 * if we're wrong it's no big deal - we will do the split the right
1039 * (it may look like it's equally easy to do a similar hack for
1040 * reverse sorted data, that is, split the tree left, but it's not.
1043 if (nextbn
== 0 && skip
== le16_to_cpu(sp
->header
.maxentry
)) {
1045 * acquire a transaction lock on the new/right page;
1047 * action: xad insertion;
1049 /* insert entry at the first entry of the new right page */
1050 xad
= &rp
->xad
[XTENTRYSTART
];
1051 XT_PUTENTRY(xad
, split
->flag
, split
->off
, split
->len
,
1054 rp
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
+ 1);
1056 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1057 /* rxtlck->lwm.offset = XTENTRYSTART; */
1058 rxtlck
->lwm
.length
= 1;
1064 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp
, rp
);
1069 * non-sequential insert (at possibly middle page)
1073 * update previous pointer of old next/right page of <sp>
1076 XT_GETPAGE(ip
, nextbn
, mp
, PSIZE
, p
, rc
);
1082 BT_MARK_DIRTY(mp
, ip
);
1084 * acquire a transaction lock on the next page;
1086 * action:sibling pointer update;
1088 if (!test_cflag(COMMIT_Nolink
, ip
))
1089 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckRELINK
);
1091 p
->header
.prev
= cpu_to_le64(rbn
);
1093 /* sibling page may have been updated previously, or
1094 * it may be updated later;
1101 * split the data between the split and new/right pages
1103 maxentry
= le16_to_cpu(sp
->header
.maxentry
);
1104 middle
= maxentry
>> 1;
1105 righthalf
= maxentry
- middle
;
1108 * skip index in old split/left page - insert into left page:
1110 if (skip
<= middle
) {
1111 /* move right half of split page to the new right page */
1112 memmove(&rp
->xad
[XTENTRYSTART
], &sp
->xad
[middle
],
1113 righthalf
<< L2XTSLOTSIZE
);
1115 /* shift right tail of left half to make room for new entry */
1117 memmove(&sp
->xad
[skip
+ 1], &sp
->xad
[skip
],
1118 (middle
- skip
) << L2XTSLOTSIZE
);
1120 /* insert new entry */
1121 xad
= &sp
->xad
[skip
];
1122 XT_PUTENTRY(xad
, split
->flag
, split
->off
, split
->len
,
1125 /* update page header */
1126 sp
->header
.nextindex
= cpu_to_le16(middle
+ 1);
1127 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1128 sxtlck
->lwm
.offset
= (sxtlck
->lwm
.offset
) ?
1129 min(skip
, (int)sxtlck
->lwm
.offset
) : skip
;
1132 rp
->header
.nextindex
=
1133 cpu_to_le16(XTENTRYSTART
+ righthalf
);
1136 * skip index in new right page - insert into right page:
1139 /* move left head of right half to right page */
1141 memmove(&rp
->xad
[XTENTRYSTART
], &sp
->xad
[middle
],
1144 /* insert new entry */
1147 XT_PUTENTRY(xad
, split
->flag
, split
->off
, split
->len
,
1150 /* move right tail of right half to right page */
1151 if (skip
< maxentry
)
1152 memmove(&rp
->xad
[n
+ 1], &sp
->xad
[skip
],
1153 (maxentry
- skip
) << L2XTSLOTSIZE
);
1155 /* update page header */
1156 sp
->header
.nextindex
= cpu_to_le16(middle
);
1157 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1158 sxtlck
->lwm
.offset
= (sxtlck
->lwm
.offset
) ?
1159 min(middle
, (int)sxtlck
->lwm
.offset
) : middle
;
1162 rp
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
+
1166 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1167 sxtlck
->lwm
.length
= le16_to_cpu(sp
->header
.nextindex
) -
1170 /* rxtlck->lwm.offset = XTENTRYSTART; */
1171 rxtlck
->lwm
.length
= le16_to_cpu(rp
->header
.nextindex
) -
1178 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp
, rp
);
1183 /* Rollback quota allocation. */
1184 if (quota_allocation
)
1185 dquot_free_block(ip
, quota_allocation
);
1195 * split the full root page into original/root/split page and new
1197 * i.e., root remains fixed in tree anchor (inode) and the root is
1198 * copied to a single new right child page since root page <<
1199 * non-root page, and the split root page contains a single entry
1200 * for the new right child page.
1205 * struct xtsplit *split,
1206 * struct metapage **rmpp)
1209 * Pointer to page in which to insert or NULL on error.
1212 xtSplitRoot(tid_t tid
,
1213 struct inode
*ip
, struct xtsplit
* split
, struct metapage
** rmpp
)
1216 struct metapage
*rmp
;
1219 int skip
, nextindex
;
1222 struct pxdlist
*pxdlist
;
1224 struct xtlock
*xtlck
;
1227 sp
= &JFS_IP(ip
)->i_xtroot
;
1229 INCREMENT(xtStat
.split
);
1232 * allocate a single (right) child page
1234 pxdlist
= split
->pxdlist
;
1235 pxd
= &pxdlist
->pxd
[pxdlist
->npxd
];
1237 rbn
= addressPXD(pxd
);
1238 rmp
= get_metapage(ip
, rbn
, PSIZE
, 1);
1242 /* Allocate blocks to quota. */
1243 rc
= dquot_alloc_block(ip
, lengthPXD(pxd
));
1245 release_metapage(rmp
);
1249 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip
, rmp
);
1252 * acquire a transaction lock on the new right page;
1256 BT_MARK_DIRTY(rmp
, ip
);
1258 rp
= (xtpage_t
*) rmp
->data
;
1260 (sp
->header
.flag
& BT_LEAF
) ? BT_LEAF
: BT_INTERNAL
;
1261 rp
->header
.self
= *pxd
;
1262 rp
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
);
1263 rp
->header
.maxentry
= cpu_to_le16(PSIZE
>> L2XTSLOTSIZE
);
1265 /* initialize sibling pointers */
1266 rp
->header
.next
= 0;
1267 rp
->header
.prev
= 0;
1270 * copy the in-line root page into new right page extent
1272 nextindex
= le16_to_cpu(sp
->header
.maxentry
);
1273 memmove(&rp
->xad
[XTENTRYSTART
], &sp
->xad
[XTENTRYSTART
],
1274 (nextindex
- XTENTRYSTART
) << L2XTSLOTSIZE
);
1277 * insert the new entry into the new right/child page
1278 * (skip index in the new right page will not change)
1280 skip
= split
->index
;
1281 /* if insert into middle, shift right remaining entries */
1282 if (skip
!= nextindex
)
1283 memmove(&rp
->xad
[skip
+ 1], &rp
->xad
[skip
],
1284 (nextindex
- skip
) * sizeof(xad_t
));
1286 xad
= &rp
->xad
[skip
];
1287 XT_PUTENTRY(xad
, split
->flag
, split
->off
, split
->len
, split
->addr
);
1289 /* update page header */
1290 rp
->header
.nextindex
= cpu_to_le16(nextindex
+ 1);
1292 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1293 tlck
= txLock(tid
, ip
, rmp
, tlckXTREE
| tlckNEW
);
1294 xtlck
= (struct xtlock
*) & tlck
->lock
;
1295 xtlck
->lwm
.offset
= XTENTRYSTART
;
1296 xtlck
->lwm
.length
= le16_to_cpu(rp
->header
.nextindex
) -
1303 * init root with the single entry for the new right page
1304 * set the 1st entry offset to 0, which force the left-most key
1305 * at any level of the tree to be less than any search key.
1308 * acquire a transaction lock on the root page (in-memory inode);
1310 * action: root split;
1312 BT_MARK_DIRTY(split
->mp
, ip
);
1314 xad
= &sp
->xad
[XTENTRYSTART
];
1315 XT_PUTENTRY(xad
, XAD_NEW
, 0, JFS_SBI(ip
->i_sb
)->nbperpage
, rbn
);
1317 /* update page header of root */
1318 sp
->header
.flag
&= ~BT_LEAF
;
1319 sp
->header
.flag
|= BT_INTERNAL
;
1321 sp
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
+ 1);
1323 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1324 tlck
= txLock(tid
, ip
, split
->mp
, tlckXTREE
| tlckGROW
);
1325 xtlck
= (struct xtlock
*) & tlck
->lock
;
1326 xtlck
->lwm
.offset
= XTENTRYSTART
;
1327 xtlck
->lwm
.length
= 1;
1332 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp
, rp
);
1340 * function: extend in-place;
1342 * note: existing extent may or may not have been committed.
1343 * caller is responsible for pager buffer cache update, and
1344 * working block allocation map update;
1345 * update pmap: alloc whole extended extent;
1347 int xtExtend(tid_t tid
, /* transaction id */
1348 struct inode
*ip
, s64 xoff
, /* delta extent offset */
1349 s32 xlen
, /* delta extent length */
1354 struct metapage
*mp
; /* meta-page buffer */
1355 xtpage_t
*p
; /* base B+-tree index page */
1357 int index
, nextindex
, len
;
1358 struct btstack btstack
; /* traverse stack */
1359 struct xtsplit split
; /* split information */
1363 struct xtlock
*xtlck
= NULL
;
1365 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong
) xoff
, xlen
);
1367 /* there must exist extent to be extended */
1368 if ((rc
= xtSearch(ip
, xoff
- 1, NULL
, &cmp
, &btstack
, XT_INSERT
)))
1371 /* retrieve search result */
1372 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
1376 jfs_error(ip
->i_sb
, "xtSearch did not find extent\n");
1380 /* extension must be contiguous */
1381 xad
= &p
->xad
[index
];
1382 if ((offsetXAD(xad
) + lengthXAD(xad
)) != xoff
) {
1384 jfs_error(ip
->i_sb
, "extension is not contiguous\n");
1389 * acquire a transaction lock on the leaf page;
1391 * action: xad insertion/extension;
1393 BT_MARK_DIRTY(mp
, ip
);
1394 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1395 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckGROW
);
1396 xtlck
= (struct xtlock
*) & tlck
->lock
;
1399 /* extend will overflow extent ? */
1400 xlen
= lengthXAD(xad
) + xlen
;
1401 if ((len
= xlen
- MAXXLEN
) <= 0)
1405 * extent overflow: insert entry for new extent
1408 xoff
= offsetXAD(xad
) + MAXXLEN
;
1409 xaddr
= addressXAD(xad
) + MAXXLEN
;
1410 nextindex
= le16_to_cpu(p
->header
.nextindex
);
1413 * if the leaf page is full, insert the new entry and
1414 * propagate up the router entry for the new page from split
1416 * The xtSplitUp() will insert the entry and unpin the leaf page.
1418 if (nextindex
== le16_to_cpu(p
->header
.maxentry
)) {
1419 /* xtSpliUp() unpins leaf pages */
1421 split
.index
= index
+ 1;
1422 split
.flag
= XAD_NEW
;
1423 split
.off
= xoff
; /* split offset */
1426 split
.pxdlist
= NULL
;
1427 if ((rc
= xtSplitUp(tid
, ip
, &split
, &btstack
)))
1430 /* get back old page */
1431 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
1435 * if leaf root has been split, original root has been
1436 * copied to new child page, i.e., original entry now
1437 * resides on the new child page;
1439 if (p
->header
.flag
& BT_INTERNAL
) {
1440 ASSERT(p
->header
.nextindex
==
1441 cpu_to_le16(XTENTRYSTART
+ 1));
1442 xad
= &p
->xad
[XTENTRYSTART
];
1443 bn
= addressXAD(xad
);
1446 /* get new child page */
1447 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
1451 BT_MARK_DIRTY(mp
, ip
);
1452 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1453 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
|tlckGROW
);
1454 xtlck
= (struct xtlock
*) & tlck
->lock
;
1459 * insert the new entry into the leaf page
1462 /* insert the new entry: mark the entry NEW */
1463 xad
= &p
->xad
[index
+ 1];
1464 XT_PUTENTRY(xad
, XAD_NEW
, xoff
, len
, xaddr
);
1466 /* advance next available entry index */
1467 le16_add_cpu(&p
->header
.nextindex
, 1);
1470 /* get back old entry */
1471 xad
= &p
->xad
[index
];
1478 XADlength(xad
, xlen
);
1479 if (!(xad
->flag
& XAD_NEW
))
1480 xad
->flag
|= XAD_EXTENDED
;
1482 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1484 (xtlck
->lwm
.offset
) ? min(index
,
1485 (int)xtlck
->lwm
.offset
) : index
;
1487 le16_to_cpu(p
->header
.nextindex
) - xtlck
->lwm
.offset
;
1490 /* unpin the leaf page */
1500 * function: split existing 'tail' extent
1501 * (split offset >= start offset of tail extent), and
1502 * relocate and extend the split tail half;
1504 * note: existing extent may or may not have been committed.
1505 * caller is responsible for pager buffer cache update, and
1506 * working block allocation map update;
1507 * update pmap: free old split tail extent, alloc new extent;
1509 int xtTailgate(tid_t tid
, /* transaction id */
1510 struct inode
*ip
, s64 xoff
, /* split/new extent offset */
1511 s32 xlen
, /* new extent length */
1512 s64 xaddr
, /* new extent address */
1517 struct metapage
*mp
; /* meta-page buffer */
1518 xtpage_t
*p
; /* base B+-tree index page */
1520 int index
, nextindex
, llen
, rlen
;
1521 struct btstack btstack
; /* traverse stack */
1522 struct xtsplit split
; /* split information */
1525 struct xtlock
*xtlck
= 0;
1526 struct tlock
*mtlck
;
1527 struct maplock
*pxdlock
;
1530 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1531 (ulong)xoff, xlen, (ulong)xaddr);
1534 /* there must exist extent to be tailgated */
1535 if ((rc
= xtSearch(ip
, xoff
, NULL
, &cmp
, &btstack
, XT_INSERT
)))
1538 /* retrieve search result */
1539 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
1543 jfs_error(ip
->i_sb
, "couldn't find extent\n");
1547 /* entry found must be last entry */
1548 nextindex
= le16_to_cpu(p
->header
.nextindex
);
1549 if (index
!= nextindex
- 1) {
1551 jfs_error(ip
->i_sb
, "the entry found is not the last entry\n");
1555 BT_MARK_DIRTY(mp
, ip
);
1557 * acquire tlock of the leaf page containing original entry
1559 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1560 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckGROW
);
1561 xtlck
= (struct xtlock
*) & tlck
->lock
;
1564 /* completely replace extent ? */
1565 xad
= &p
->xad
[index
];
1567 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1568 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1570 if ((llen
= xoff
- offsetXAD(xad
)) == 0)
1574 * partially replace extent: insert entry for new extent
1578 * if the leaf page is full, insert the new entry and
1579 * propagate up the router entry for the new page from split
1581 * The xtSplitUp() will insert the entry and unpin the leaf page.
1583 if (nextindex
== le16_to_cpu(p
->header
.maxentry
)) {
1584 /* xtSpliUp() unpins leaf pages */
1586 split
.index
= index
+ 1;
1587 split
.flag
= XAD_NEW
;
1588 split
.off
= xoff
; /* split offset */
1591 split
.pxdlist
= NULL
;
1592 if ((rc
= xtSplitUp(tid
, ip
, &split
, &btstack
)))
1595 /* get back old page */
1596 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
1600 * if leaf root has been split, original root has been
1601 * copied to new child page, i.e., original entry now
1602 * resides on the new child page;
1604 if (p
->header
.flag
& BT_INTERNAL
) {
1605 ASSERT(p
->header
.nextindex
==
1606 cpu_to_le16(XTENTRYSTART
+ 1));
1607 xad
= &p
->xad
[XTENTRYSTART
];
1608 bn
= addressXAD(xad
);
1611 /* get new child page */
1612 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
1616 BT_MARK_DIRTY(mp
, ip
);
1617 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1618 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
|tlckGROW
);
1619 xtlck
= (struct xtlock
*) & tlck
->lock
;
1624 * insert the new entry into the leaf page
1627 /* insert the new entry: mark the entry NEW */
1628 xad
= &p
->xad
[index
+ 1];
1629 XT_PUTENTRY(xad
, XAD_NEW
, xoff
, xlen
, xaddr
);
1631 /* advance next available entry index */
1632 le16_add_cpu(&p
->header
.nextindex
, 1);
1635 /* get back old XAD */
1636 xad
= &p
->xad
[index
];
1639 * truncate/relocate old extent at split offset
1642 /* update dmap for old/committed/truncated extent */
1643 rlen
= lengthXAD(xad
) - llen
;
1644 if (!(xad
->flag
& XAD_NEW
)) {
1645 /* free from PWMAP at commit */
1646 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1647 mtlck
= txMaplock(tid
, ip
, tlckMAP
);
1648 pxdlock
= (struct maplock
*) & mtlck
->lock
;
1649 pxdlock
->flag
= mlckFREEPXD
;
1650 PXDaddress(&pxdlock
->pxd
, addressXAD(xad
) + llen
);
1651 PXDlength(&pxdlock
->pxd
, rlen
);
1655 /* free from WMAP */
1656 dbFree(ip
, addressXAD(xad
) + llen
, (s64
) rlen
);
1660 XADlength(xad
, llen
);
1663 XT_PUTENTRY(xad
, XAD_NEW
, xoff
, xlen
, xaddr
);
1665 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1666 xtlck
->lwm
.offset
= (xtlck
->lwm
.offset
) ?
1667 min(index
, (int)xtlck
->lwm
.offset
) : index
;
1668 xtlck
->lwm
.length
= le16_to_cpu(p
->header
.nextindex
) -
1672 /* unpin the leaf page */
1677 #endif /* _NOTYET */
1682 * function: update XAD;
1684 * update extent for allocated_but_not_recorded or
1685 * compressed extent;
1689 * logical extent of the specified XAD must be completely
1690 * contained by an existing XAD;
1692 int xtUpdate(tid_t tid
, struct inode
*ip
, xad_t
* nxad
)
1696 struct metapage
*mp
; /* meta-page buffer */
1697 xtpage_t
*p
; /* base B+-tree index page */
1699 int index0
, index
, newindex
, nextindex
;
1700 struct btstack btstack
; /* traverse stack */
1701 struct xtsplit split
; /* split information */
1702 xad_t
*xad
, *lxad
, *rxad
;
1705 int nxlen
, xlen
, lxlen
, rxlen
;
1708 struct xtlock
*xtlck
= NULL
;
1711 /* there must exist extent to be tailgated */
1712 nxoff
= offsetXAD(nxad
);
1713 nxlen
= lengthXAD(nxad
);
1714 nxaddr
= addressXAD(nxad
);
1716 if ((rc
= xtSearch(ip
, nxoff
, NULL
, &cmp
, &btstack
, XT_INSERT
)))
1719 /* retrieve search result */
1720 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index0
);
1724 jfs_error(ip
->i_sb
, "Could not find extent\n");
1728 BT_MARK_DIRTY(mp
, ip
);
1730 * acquire tlock of the leaf page containing original entry
1732 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1733 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckGROW
);
1734 xtlck
= (struct xtlock
*) & tlck
->lock
;
1737 xad
= &p
->xad
[index0
];
1739 xoff
= offsetXAD(xad
);
1740 xlen
= lengthXAD(xad
);
1741 xaddr
= addressXAD(xad
);
1743 /* nXAD must be completely contained within XAD */
1744 if ((xoff
> nxoff
) ||
1745 (nxoff
+ nxlen
> xoff
+ xlen
)) {
1748 "nXAD in not completely contained within XAD\n");
1753 newindex
= index
+ 1;
1754 nextindex
= le16_to_cpu(p
->header
.nextindex
);
1756 #ifdef _JFS_WIP_NOCOALESCE
1761 * replace XAD with nXAD
1763 replace
: /* (nxoff == xoff) */
1764 if (nxlen
== xlen
) {
1765 /* replace XAD with nXAD:recorded */
1767 xad
->flag
= xflag
& ~XAD_NOTRECORDED
;
1770 } else /* (nxlen < xlen) */
1772 #endif /* _JFS_WIP_NOCOALESCE */
1774 /* #ifdef _JFS_WIP_COALESCE */
1779 * coalesce with left XAD
1781 //coalesceLeft: /* (xoff == nxoff) */
1782 /* is XAD first entry of page ? */
1783 if (index
== XTENTRYSTART
)
1786 /* is nXAD logically and physically contiguous with lXAD ? */
1787 lxad
= &p
->xad
[index
- 1];
1788 lxlen
= lengthXAD(lxad
);
1789 if (!(lxad
->flag
& XAD_NOTRECORDED
) &&
1790 (nxoff
== offsetXAD(lxad
) + lxlen
) &&
1791 (nxaddr
== addressXAD(lxad
) + lxlen
) &&
1792 (lxlen
+ nxlen
< MAXXLEN
)) {
1793 /* extend right lXAD */
1795 XADlength(lxad
, lxlen
+ nxlen
);
1797 /* If we just merged two extents together, need to make sure the
1798 * right extent gets logged. If the left one is marked XAD_NEW,
1799 * then we know it will be logged. Otherwise, mark as
1802 if (!(lxad
->flag
& XAD_NEW
))
1803 lxad
->flag
|= XAD_EXTENDED
;
1807 XADoffset(xad
, xoff
+ nxlen
);
1808 XADlength(xad
, xlen
- nxlen
);
1809 XADaddress(xad
, xaddr
+ nxlen
);
1811 } else { /* (xlen == nxlen) */
1814 if (index
< nextindex
- 1)
1815 memmove(&p
->xad
[index
], &p
->xad
[index
+ 1],
1816 (nextindex
- index
-
1817 1) << L2XTSLOTSIZE
);
1819 p
->header
.nextindex
=
1820 cpu_to_le16(le16_to_cpu(p
->header
.nextindex
) -
1824 newindex
= index
+ 1;
1825 nextindex
= le16_to_cpu(p
->header
.nextindex
);
1826 xoff
= nxoff
= offsetXAD(lxad
);
1827 xlen
= nxlen
= lxlen
+ nxlen
;
1828 xaddr
= nxaddr
= addressXAD(lxad
);
1834 * replace XAD with nXAD
1836 replace
: /* (nxoff == xoff) */
1837 if (nxlen
== xlen
) {
1838 /* replace XAD with nXAD:recorded */
1840 xad
->flag
= xflag
& ~XAD_NOTRECORDED
;
1843 } else /* (nxlen < xlen) */
1847 * coalesce with right XAD
1849 coalesceRight
: /* (xoff <= nxoff) */
1850 /* is XAD last entry of page ? */
1851 if (newindex
== nextindex
) {
1857 /* is nXAD logically and physically contiguous with rXAD ? */
1858 rxad
= &p
->xad
[index
+ 1];
1859 rxlen
= lengthXAD(rxad
);
1860 if (!(rxad
->flag
& XAD_NOTRECORDED
) &&
1861 (nxoff
+ nxlen
== offsetXAD(rxad
)) &&
1862 (nxaddr
+ nxlen
== addressXAD(rxad
)) &&
1863 (rxlen
+ nxlen
< MAXXLEN
)) {
1864 /* extend left rXAD */
1865 XADoffset(rxad
, nxoff
);
1866 XADlength(rxad
, rxlen
+ nxlen
);
1867 XADaddress(rxad
, nxaddr
);
1869 /* If we just merged two extents together, need to make sure
1870 * the left extent gets logged. If the right one is marked
1871 * XAD_NEW, then we know it will be logged. Otherwise, mark as
1874 if (!(rxad
->flag
& XAD_NEW
))
1875 rxad
->flag
|= XAD_EXTENDED
;
1879 XADlength(xad
, xlen
- nxlen
);
1880 else { /* (xlen == nxlen) */
1883 memmove(&p
->xad
[index
], &p
->xad
[index
+ 1],
1884 (nextindex
- index
- 1) << L2XTSLOTSIZE
);
1886 p
->header
.nextindex
=
1887 cpu_to_le16(le16_to_cpu(p
->header
.nextindex
) -
1892 } else if (xoff
== nxoff
)
1895 if (xoff
>= nxoff
) {
1897 jfs_error(ip
->i_sb
, "xoff >= nxoff\n");
1900 /* #endif _JFS_WIP_COALESCE */
1903 * split XAD into (lXAD, nXAD):
1906 * --|----------XAD----------|--
1909 updateRight
: /* (xoff < nxoff) */
1910 /* truncate old XAD as lXAD:not_recorded */
1911 xad
= &p
->xad
[index
];
1912 XADlength(xad
, nxoff
- xoff
);
1914 /* insert nXAD:recorded */
1915 if (nextindex
== le16_to_cpu(p
->header
.maxentry
)) {
1917 /* xtSpliUp() unpins leaf pages */
1919 split
.index
= newindex
;
1920 split
.flag
= xflag
& ~XAD_NOTRECORDED
;
1923 split
.addr
= nxaddr
;
1924 split
.pxdlist
= NULL
;
1925 if ((rc
= xtSplitUp(tid
, ip
, &split
, &btstack
)))
1928 /* get back old page */
1929 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
1933 * if leaf root has been split, original root has been
1934 * copied to new child page, i.e., original entry now
1935 * resides on the new child page;
1937 if (p
->header
.flag
& BT_INTERNAL
) {
1938 ASSERT(p
->header
.nextindex
==
1939 cpu_to_le16(XTENTRYSTART
+ 1));
1940 xad
= &p
->xad
[XTENTRYSTART
];
1941 bn
= addressXAD(xad
);
1944 /* get new child page */
1945 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
1949 BT_MARK_DIRTY(mp
, ip
);
1950 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1951 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
|tlckGROW
);
1952 xtlck
= (struct xtlock
*) & tlck
->lock
;
1955 /* is nXAD on new page ? */
1957 (le16_to_cpu(p
->header
.maxentry
) >> 1)) {
1960 le16_to_cpu(p
->header
.nextindex
) +
1966 /* if insert into middle, shift right remaining entries */
1967 if (newindex
< nextindex
)
1968 memmove(&p
->xad
[newindex
+ 1], &p
->xad
[newindex
],
1969 (nextindex
- newindex
) << L2XTSLOTSIZE
);
1971 /* insert the entry */
1972 xad
= &p
->xad
[newindex
];
1974 xad
->flag
= xflag
& ~XAD_NOTRECORDED
;
1976 /* advance next available entry index. */
1977 p
->header
.nextindex
=
1978 cpu_to_le16(le16_to_cpu(p
->header
.nextindex
) + 1);
1982 * does nXAD force 3-way split ?
1985 * --|----------XAD-------------|--
1986 * |-lXAD-| |-rXAD -|
1988 if (nxoff
+ nxlen
== xoff
+ xlen
)
1991 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
1993 /* close out old page */
1994 if (!test_cflag(COMMIT_Nolink
, ip
)) {
1995 xtlck
->lwm
.offset
= (xtlck
->lwm
.offset
) ?
1996 min(index0
, (int)xtlck
->lwm
.offset
) : index0
;
1998 le16_to_cpu(p
->header
.nextindex
) -
2002 bn
= le64_to_cpu(p
->header
.next
);
2005 /* get new right page */
2006 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
2010 BT_MARK_DIRTY(mp
, ip
);
2011 if (!test_cflag(COMMIT_Nolink
, ip
)) {
2012 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckGROW
);
2013 xtlck
= (struct xtlock
*) & tlck
->lock
;
2016 index0
= index
= newindex
;
2020 newindex
= index
+ 1;
2021 nextindex
= le16_to_cpu(p
->header
.nextindex
);
2022 xlen
= xlen
- (nxoff
- xoff
);
2026 /* recompute split pages */
2027 if (nextindex
== le16_to_cpu(p
->header
.maxentry
)) {
2030 if ((rc
= xtSearch(ip
, nxoff
, NULL
, &cmp
, &btstack
, XT_INSERT
)))
2033 /* retrieve search result */
2034 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index0
);
2038 jfs_error(ip
->i_sb
, "xtSearch failed\n");
2042 if (index0
!= index
) {
2044 jfs_error(ip
->i_sb
, "unexpected value of index\n");
2050 * split XAD into (nXAD, rXAD)
2053 * --|----------XAD----------|--
2056 updateLeft
: /* (nxoff == xoff) && (nxlen < xlen) */
2057 /* update old XAD with nXAD:recorded */
2058 xad
= &p
->xad
[index
];
2060 xad
->flag
= xflag
& ~XAD_NOTRECORDED
;
2062 /* insert rXAD:not_recorded */
2063 xoff
= xoff
+ nxlen
;
2064 xlen
= xlen
- nxlen
;
2065 xaddr
= xaddr
+ nxlen
;
2066 if (nextindex
== le16_to_cpu(p
->header
.maxentry
)) {
2068 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2070 /* xtSpliUp() unpins leaf pages */
2072 split
.index
= newindex
;
2077 split
.pxdlist
= NULL
;
2078 if ((rc
= xtSplitUp(tid
, ip
, &split
, &btstack
)))
2081 /* get back old page */
2082 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
2087 * if leaf root has been split, original root has been
2088 * copied to new child page, i.e., original entry now
2089 * resides on the new child page;
2091 if (p
->header
.flag
& BT_INTERNAL
) {
2092 ASSERT(p
->header
.nextindex
==
2093 cpu_to_le16(XTENTRYSTART
+ 1));
2094 xad
= &p
->xad
[XTENTRYSTART
];
2095 bn
= addressXAD(xad
);
2098 /* get new child page */
2099 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
2103 BT_MARK_DIRTY(mp
, ip
);
2104 if (!test_cflag(COMMIT_Nolink
, ip
)) {
2105 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
|tlckGROW
);
2106 xtlck
= (struct xtlock
*) & tlck
->lock
;
2110 /* if insert into middle, shift right remaining entries */
2111 if (newindex
< nextindex
)
2112 memmove(&p
->xad
[newindex
+ 1], &p
->xad
[newindex
],
2113 (nextindex
- newindex
) << L2XTSLOTSIZE
);
2115 /* insert the entry */
2116 xad
= &p
->xad
[newindex
];
2117 XT_PUTENTRY(xad
, xflag
, xoff
, xlen
, xaddr
);
2119 /* advance next available entry index. */
2120 p
->header
.nextindex
=
2121 cpu_to_le16(le16_to_cpu(p
->header
.nextindex
) + 1);
2125 if (!test_cflag(COMMIT_Nolink
, ip
)) {
2126 xtlck
->lwm
.offset
= (xtlck
->lwm
.offset
) ?
2127 min(index0
, (int)xtlck
->lwm
.offset
) : index0
;
2128 xtlck
->lwm
.length
= le16_to_cpu(p
->header
.nextindex
) -
2132 /* unpin the leaf page */
2142 * function: grow in append mode from contiguous region specified ;
2145 * tid - transaction id;
2147 * xflag - extent flag:
2148 * xoff - extent offset;
2149 * maxblocks - max extent length;
2150 * xlen - extent length (in/out);
2151 * xaddrp - extent address pointer (in/out):
2156 int xtAppend(tid_t tid
, /* transaction id */
2157 struct inode
*ip
, int xflag
, s64 xoff
, s32 maxblocks
,
2158 s32
* xlenp
, /* (in/out) */
2159 s64
* xaddrp
, /* (in/out) */
2163 struct metapage
*mp
; /* meta-page buffer */
2164 xtpage_t
*p
; /* base B+-tree index page */
2166 int index
, nextindex
;
2167 struct btstack btstack
; /* traverse stack */
2168 struct xtsplit split
; /* split information */
2172 struct xtlock
*xtlck
;
2173 int nsplit
, nblocks
, xlen
;
2174 struct pxdlist pxdlist
;
2180 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
2181 (ulong
) xoff
, maxblocks
, xlen
, (ulong
) xaddr
);
2184 * search for the entry location at which to insert:
2186 * xtFastSearch() and xtSearch() both returns (leaf page
2187 * pinned, index at which to insert).
2188 * n.b. xtSearch() may return index of maxentry of
2191 if ((rc
= xtSearch(ip
, xoff
, &next
, &cmp
, &btstack
, XT_INSERT
)))
2194 /* retrieve search result */
2195 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
2203 xlen
= min(xlen
, (int)(next
- xoff
));
2206 * insert entry for new extent
2211 * if the leaf page is full, split the page and
2212 * propagate up the router entry for the new page from split
2214 * The xtSplitUp() will insert the entry and unpin the leaf page.
2216 nextindex
= le16_to_cpu(p
->header
.nextindex
);
2217 if (nextindex
< le16_to_cpu(p
->header
.maxentry
))
2221 * allocate new index blocks to cover index page split(s)
2223 nsplit
= btstack
.nsplit
;
2224 split
.pxdlist
= &pxdlist
;
2225 pxdlist
.maxnpxd
= pxdlist
.npxd
= 0;
2226 pxd
= &pxdlist
.pxd
[0];
2227 nblocks
= JFS_SBI(ip
->i_sb
)->nbperpage
;
2228 for (; nsplit
> 0; nsplit
--, pxd
++, xaddr
+= nblocks
, maxblocks
-= nblocks
) {
2229 if ((rc
= dbAllocBottomUp(ip
, xaddr
, (s64
) nblocks
)) == 0) {
2230 PXDaddress(pxd
, xaddr
);
2231 PXDlength(pxd
, nblocks
);
2238 /* undo allocation */
2243 xlen
= min(xlen
, maxblocks
);
2246 * allocate data extent requested
2248 if ((rc
= dbAllocBottomUp(ip
, xaddr
, (s64
) xlen
)))
2252 split
.index
= index
;
2257 if ((rc
= xtSplitUp(tid
, ip
, &split
, &btstack
))) {
2258 /* undo data extent allocation */
2259 dbFree(ip
, *xaddrp
, (s64
) * xlenp
);
2269 * insert the new entry into the leaf page
2273 * allocate data extent requested
2275 if ((rc
= dbAllocBottomUp(ip
, xaddr
, (s64
) xlen
)))
2278 BT_MARK_DIRTY(mp
, ip
);
2280 * acquire a transaction lock on the leaf page;
2282 * action: xad insertion/extension;
2284 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckGROW
);
2285 xtlck
= (struct xtlock
*) & tlck
->lock
;
2287 /* insert the new entry: mark the entry NEW */
2288 xad
= &p
->xad
[index
];
2289 XT_PUTENTRY(xad
, xflag
, xoff
, xlen
, xaddr
);
2291 /* advance next available entry index */
2292 le16_add_cpu(&p
->header
.nextindex
, 1);
2295 (xtlck
->lwm
.offset
) ? min(index
,(int) xtlck
->lwm
.offset
) : index
;
2296 xtlck
->lwm
.length
= le16_to_cpu(p
->header
.nextindex
) -
2303 /* unpin the leaf page */
2308 #ifdef _STILL_TO_PORT
2310 /* - TBD for defragmentaion/reorganization -
2315 * delete the entry with the specified key.
2317 * N.B.: whole extent of the entry is assumed to be deleted.
2322 * ENOENT: if the entry is not found.
2326 int xtDelete(tid_t tid
, struct inode
*ip
, s64 xoff
, s32 xlen
, int flag
)
2329 struct btstack btstack
;
2332 struct metapage
*mp
;
2334 int index
, nextindex
;
2336 struct xtlock
*xtlck
;
2339 * find the matching entry; xtSearch() pins the page
2341 if ((rc
= xtSearch(ip
, xoff
, NULL
, &cmp
, &btstack
, 0)))
2344 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
2346 /* unpin the leaf page */
2352 * delete the entry from the leaf page
2354 nextindex
= le16_to_cpu(p
->header
.nextindex
);
2355 le16_add_cpu(&p
->header
.nextindex
, -1);
2358 * if the leaf page bocome empty, free the page
2360 if (p
->header
.nextindex
== cpu_to_le16(XTENTRYSTART
))
2361 return (xtDeleteUp(tid
, ip
, mp
, p
, &btstack
));
2363 BT_MARK_DIRTY(mp
, ip
);
2365 * acquire a transaction lock on the leaf page;
2367 * action:xad deletion;
2369 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
2370 xtlck
= (struct xtlock
*) & tlck
->lock
;
2372 (xtlck
->lwm
.offset
) ? min(index
, xtlck
->lwm
.offset
) : index
;
2374 /* if delete from middle, shift left/compact the remaining entries */
2375 if (index
< nextindex
- 1)
2376 memmove(&p
->xad
[index
], &p
->xad
[index
+ 1],
2377 (nextindex
- index
- 1) * sizeof(xad_t
));
2385 /* - TBD for defragmentaion/reorganization -
2390 * free empty pages as propagating deletion up the tree
2397 xtDeleteUp(tid_t tid
, struct inode
*ip
,
2398 struct metapage
* fmp
, xtpage_t
* fp
, struct btstack
* btstack
)
2401 struct metapage
*mp
;
2403 int index
, nextindex
;
2406 struct btframe
*parent
;
2408 struct xtlock
*xtlck
;
2411 * keep root leaf page which has become empty
2413 if (fp
->header
.flag
& BT_ROOT
) {
2414 /* keep the root page */
2415 fp
->header
.flag
&= ~BT_INTERNAL
;
2416 fp
->header
.flag
|= BT_LEAF
;
2417 fp
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
);
2419 /* XT_PUTPAGE(fmp); */
2425 * free non-root leaf page
2427 if ((rc
= xtRelink(tid
, ip
, fp
))) {
2432 xaddr
= addressPXD(&fp
->header
.self
);
2433 xlen
= lengthPXD(&fp
->header
.self
);
2434 /* free the page extent */
2435 dbFree(ip
, xaddr
, (s64
) xlen
);
2437 /* free the buffer page */
2438 discard_metapage(fmp
);
2441 * propagate page deletion up the index tree
2443 * If the delete from the parent page makes it empty,
2444 * continue all the way up the tree.
2445 * stop if the root page is reached (which is never deleted) or
2446 * if the entry deletion does not empty the page.
2448 while ((parent
= BT_POP(btstack
)) != NULL
) {
2449 /* get/pin the parent page <sp> */
2450 XT_GETPAGE(ip
, parent
->bn
, mp
, PSIZE
, p
, rc
);
2454 index
= parent
->index
;
2456 /* delete the entry for the freed child page from parent.
2458 nextindex
= le16_to_cpu(p
->header
.nextindex
);
2461 * the parent has the single entry being deleted:
2462 * free the parent page which has become empty.
2464 if (nextindex
== 1) {
2465 if (p
->header
.flag
& BT_ROOT
) {
2466 /* keep the root page */
2467 p
->header
.flag
&= ~BT_INTERNAL
;
2468 p
->header
.flag
|= BT_LEAF
;
2469 p
->header
.nextindex
=
2470 cpu_to_le16(XTENTRYSTART
);
2472 /* XT_PUTPAGE(mp); */
2476 /* free the parent page */
2477 if ((rc
= xtRelink(tid
, ip
, p
)))
2480 xaddr
= addressPXD(&p
->header
.self
);
2481 /* free the page extent */
2483 (s64
) JFS_SBI(ip
->i_sb
)->nbperpage
);
2485 /* unpin/free the buffer page */
2486 discard_metapage(mp
);
2493 * the parent has other entries remaining:
2494 * delete the router entry from the parent page.
2497 BT_MARK_DIRTY(mp
, ip
);
2499 * acquire a transaction lock on the leaf page;
2501 * action:xad deletion;
2503 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
2504 xtlck
= (struct xtlock
*) & tlck
->lock
;
2506 (xtlck
->lwm
.offset
) ? min(index
,
2510 /* if delete from middle,
2511 * shift left/compact the remaining entries in the page
2513 if (index
< nextindex
- 1)
2514 memmove(&p
->xad
[index
], &p
->xad
[index
+ 1],
2515 (nextindex
- index
-
2516 1) << L2XTSLOTSIZE
);
2518 le16_add_cpu(&p
->header
.nextindex
, -1);
2519 jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
2520 (ulong
) parent
->bn
, index
);
2523 /* unpin the parent page */
2526 /* exit propagation up */
2535 * NAME: xtRelocate()
2537 * FUNCTION: relocate xtpage or data extent of regular file;
2538 * This function is mainly used by defragfs utility.
2540 * NOTE: This routine does not have the logic to handle
2541 * uncommitted allocated extent. The caller should call
2542 * txCommit() to commit all the allocation before call
2546 xtRelocate(tid_t tid
, struct inode
* ip
, xad_t
* oxad
, /* old XAD */
2547 s64 nxaddr
, /* new xaddr */
2549 { /* extent type: XTPAGE or DATAEXT */
2551 struct tblock
*tblk
;
2553 struct xtlock
*xtlck
;
2554 struct metapage
*mp
, *pmp
, *lmp
, *rmp
; /* meta-page buffer */
2555 xtpage_t
*p
, *pp
, *rp
, *lp
; /* base B+-tree index page */
2560 s64 oxaddr
, sxaddr
, dxaddr
, nextbn
, prevbn
;
2562 s64 offset
, nbytes
, nbrd
, pno
;
2563 int nb
, npages
, nblks
;
2567 struct pxd_lock
*pxdlock
;
2568 struct btstack btstack
; /* traverse stack */
2570 xtype
= xtype
& EXTENT_TYPE
;
2572 xoff
= offsetXAD(oxad
);
2573 oxaddr
= addressXAD(oxad
);
2574 xlen
= lengthXAD(oxad
);
2576 /* validate extent offset */
2577 offset
= xoff
<< JFS_SBI(ip
->i_sb
)->l2bsize
;
2578 if (offset
>= ip
->i_size
)
2579 return -ESTALE
; /* stale extent */
2581 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
2582 xtype
, (ulong
) xoff
, xlen
, (ulong
) oxaddr
, (ulong
) nxaddr
);
2585 * 1. get and validate the parent xtpage/xad entry
2586 * covering the source extent to be relocated;
2588 if (xtype
== DATAEXT
) {
2589 /* search in leaf entry */
2590 rc
= xtSearch(ip
, xoff
, NULL
, &cmp
, &btstack
, 0);
2594 /* retrieve search result */
2595 XT_GETSEARCH(ip
, btstack
.top
, bn
, pmp
, pp
, index
);
2602 /* validate for exact match with a single entry */
2603 xad
= &pp
->xad
[index
];
2604 if (addressXAD(xad
) != oxaddr
|| lengthXAD(xad
) != xlen
) {
2608 } else { /* (xtype == XTPAGE) */
2610 /* search in internal entry */
2611 rc
= xtSearchNode(ip
, oxad
, &cmp
, &btstack
, 0);
2615 /* retrieve search result */
2616 XT_GETSEARCH(ip
, btstack
.top
, bn
, pmp
, pp
, index
);
2623 /* xtSearchNode() validated for exact match with a single entry
2625 xad
= &pp
->xad
[index
];
2627 jfs_info("xtRelocate: parent xad entry validated.");
2630 * 2. relocate the extent
2632 if (xtype
== DATAEXT
) {
2633 /* if the extent is allocated-but-not-recorded
2634 * there is no real data to be moved in this extent,
2636 if (xad
->flag
& XAD_NOTRECORDED
)
2639 /* release xtpage for cmRead()/xtLookup() */
2645 * copy target data pages to be relocated;
2647 * data extent must start at page boundary and
2648 * multiple of page size (except the last data extent);
2649 * read in each page of the source data extent into cbuf,
2650 * update the cbuf extent descriptor of the page to be
2651 * homeward bound to new dst data extent
2652 * copy the data from the old extent to new extent.
2653 * copy is essential for compressed files to avoid problems
2654 * that can arise if there was a change in compression
2656 * it is a good strategy because it may disrupt cache
2657 * policy to keep the pages in memory afterwards.
2659 offset
= xoff
<< JFS_SBI(ip
->i_sb
)->l2bsize
;
2660 assert((offset
& CM_OFFSET
) == 0);
2661 nbytes
= xlen
<< JFS_SBI(ip
->i_sb
)->l2bsize
;
2662 pno
= offset
>> CM_L2BSIZE
;
2663 npages
= (nbytes
+ (CM_BSIZE
- 1)) >> CM_L2BSIZE
;
2665 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2666 (offset >> CM_L2BSIZE) + 1;
2671 /* process the request one cache buffer at a time */
2672 for (nbrd
= 0; nbrd
< nbytes
; nbrd
+= nb
,
2673 offset
+= nb
, pno
++, npages
--) {
2674 /* compute page size */
2675 nb
= min(nbytes
- nbrd
, CM_BSIZE
);
2677 /* get the cache buffer of the page */
2678 if (rc
= cmRead(ip
, offset
, npages
, &cp
))
2681 assert(addressPXD(&cp
->cm_pxd
) == sxaddr
);
2682 assert(!cp
->cm_modified
);
2684 /* bind buffer with the new extent address */
2685 nblks
= nb
>> JFS_IP(ip
->i_sb
)->l2bsize
;
2686 cmSetXD(ip
, cp
, pno
, dxaddr
, nblks
);
2688 /* release the cbuf, mark it as modified */
2695 /* get back parent page */
2696 if ((rc
= xtSearch(ip
, xoff
, NULL
, &cmp
, &btstack
, 0)))
2699 XT_GETSEARCH(ip
, btstack
.top
, bn
, pmp
, pp
, index
);
2700 jfs_info("xtRelocate: target data extent relocated.");
2701 } else { /* (xtype == XTPAGE) */
2704 * read in the target xtpage from the source extent;
2706 XT_GETPAGE(ip
, oxaddr
, mp
, PSIZE
, p
, rc
);
2713 * read in sibling pages if any to update sibling pointers;
2716 if (p
->header
.next
) {
2717 nextbn
= le64_to_cpu(p
->header
.next
);
2718 XT_GETPAGE(ip
, nextbn
, rmp
, PSIZE
, rp
, rc
);
2727 if (p
->header
.prev
) {
2728 prevbn
= le64_to_cpu(p
->header
.prev
);
2729 XT_GETPAGE(ip
, prevbn
, lmp
, PSIZE
, lp
, rc
);
2739 /* at this point, all xtpages to be updated are in memory */
2742 * update sibling pointers of sibling xtpages if any;
2745 BT_MARK_DIRTY(lmp
, ip
);
2746 tlck
= txLock(tid
, ip
, lmp
, tlckXTREE
| tlckRELINK
);
2747 lp
->header
.next
= cpu_to_le64(nxaddr
);
2752 BT_MARK_DIRTY(rmp
, ip
);
2753 tlck
= txLock(tid
, ip
, rmp
, tlckXTREE
| tlckRELINK
);
2754 rp
->header
.prev
= cpu_to_le64(nxaddr
);
2759 * update the target xtpage to be relocated
2761 * update the self address of the target page
2762 * and write to destination extent;
2763 * redo image covers the whole xtpage since it is new page
2764 * to the destination extent;
2765 * update of bmap for the free of source extent
2766 * of the target xtpage itself:
2767 * update of bmap for the allocation of destination extent
2768 * of the target xtpage itself:
2769 * update of bmap for the extents covered by xad entries in
2770 * the target xtpage is not necessary since they are not
2772 * if not committed before this relocation,
2773 * target page may contain XAD_NEW entries which must
2774 * be scanned for bmap update (logredo() always
2775 * scan xtpage REDOPAGE image for bmap update);
2776 * if committed before this relocation (tlckRELOCATE),
2777 * scan may be skipped by commit() and logredo();
2779 BT_MARK_DIRTY(mp
, ip
);
2780 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
2781 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckNEW
);
2782 xtlck
= (struct xtlock
*) & tlck
->lock
;
2784 /* update the self address in the xtpage header */
2785 pxd
= &p
->header
.self
;
2786 PXDaddress(pxd
, nxaddr
);
2788 /* linelock for the after image of the whole page */
2790 le16_to_cpu(p
->header
.nextindex
) - xtlck
->lwm
.offset
;
2792 /* update the buffer extent descriptor of target xtpage */
2793 xsize
= xlen
<< JFS_SBI(ip
->i_sb
)->l2bsize
;
2794 bmSetXD(mp
, nxaddr
, xsize
);
2796 /* unpin the target page to new homeward bound */
2798 jfs_info("xtRelocate: target xtpage relocated.");
2802 * 3. acquire maplock for the source extent to be freed;
2804 * acquire a maplock saving the src relocated extent address;
2805 * to free of the extent at commit time;
2808 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
2809 * free PXD of the source data extent (logredo() will update
2810 * bmap for free of source data extent), and update bmap for
2811 * free of the source data extent;
2813 if (xtype
== DATAEXT
)
2814 tlck
= txMaplock(tid
, ip
, tlckMAP
);
2815 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
2816 * for the source xtpage (logredo() will init NoRedoPage
2817 * filter and will also update bmap for free of the source
2818 * xtpage), and update bmap for free of the source xtpage;
2819 * N.B. We use tlckMAP instead of tlkcXTREE because there
2820 * is no buffer associated with this lock since the buffer
2821 * has been redirected to the target location.
2823 else /* (xtype == XTPAGE) */
2824 tlck
= txMaplock(tid
, ip
, tlckMAP
| tlckRELOCATE
);
2826 pxdlock
= (struct pxd_lock
*) & tlck
->lock
;
2827 pxdlock
->flag
= mlckFREEPXD
;
2828 PXDaddress(&pxdlock
->pxd
, oxaddr
);
2829 PXDlength(&pxdlock
->pxd
, xlen
);
2833 * 4. update the parent xad entry for relocation;
2835 * acquire tlck for the parent entry with XAD_NEW as entry
2836 * update which will write LOG_REDOPAGE and update bmap for
2837 * allocation of XAD_NEW destination extent;
2839 jfs_info("xtRelocate: update parent xad entry.");
2840 BT_MARK_DIRTY(pmp
, ip
);
2841 tlck
= txLock(tid
, ip
, pmp
, tlckXTREE
| tlckGROW
);
2842 xtlck
= (struct xtlock
*) & tlck
->lock
;
2844 /* update the XAD with the new destination extent; */
2845 xad
= &pp
->xad
[index
];
2846 xad
->flag
|= XAD_NEW
;
2847 XADaddress(xad
, nxaddr
);
2849 xtlck
->lwm
.offset
= min(index
, xtlck
->lwm
.offset
);
2850 xtlck
->lwm
.length
= le16_to_cpu(pp
->header
.nextindex
) -
2853 /* unpin the parent xtpage */
2863 * function: search for the internal xad entry covering specified extent.
2864 * This function is mainly used by defragfs utility.
2868 * xad - extent to find;
2869 * cmpp - comparison result:
2870 * btstack - traverse stack;
2871 * flag - search process flag;
2874 * btstack contains (bn, index) of search path traversed to the entry.
2875 * *cmpp is set to result of comparison with the entry returned.
2876 * the page containing the entry is pinned at exit.
2878 static int xtSearchNode(struct inode
*ip
, xad_t
* xad
, /* required XAD entry */
2879 int *cmpp
, struct btstack
* btstack
, int flag
)
2884 int cmp
= 1; /* init for empty page */
2885 s64 bn
; /* block number */
2886 struct metapage
*mp
; /* meta-page buffer */
2887 xtpage_t
*p
; /* page */
2888 int base
, index
, lim
;
2889 struct btframe
*btsp
;
2894 xoff
= offsetXAD(xad
);
2895 xlen
= lengthXAD(xad
);
2896 xaddr
= addressXAD(xad
);
2899 * search down tree from root:
2901 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
2902 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
2904 * if entry with search key K is not found
2905 * internal page search find the entry with largest key Ki
2906 * less than K which point to the child page to search;
2907 * leaf page search find the entry with smallest key Kj
2908 * greater than K so that the returned index is the position of
2909 * the entry to be shifted right for insertion of new entry.
2910 * for empty tree, search key is greater than any key of the tree.
2912 * by convention, root bn = 0.
2915 /* get/pin the page to search */
2916 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
2919 if (p
->header
.flag
& BT_LEAF
) {
2924 lim
= le16_to_cpu(p
->header
.nextindex
) - XTENTRYSTART
;
2927 * binary search with search key K on the current page
2929 for (base
= XTENTRYSTART
; lim
; lim
>>= 1) {
2930 index
= base
+ (lim
>> 1);
2932 XT_CMP(cmp
, xoff
, &p
->xad
[index
], t64
);
2937 * verify for exact match;
2939 if (xaddr
== addressXAD(&p
->xad
[index
]) &&
2940 xoff
== offsetXAD(&p
->xad
[index
])) {
2943 /* save search result */
2944 btsp
= btstack
->top
;
2946 btsp
->index
= index
;
2952 /* descend/search its child page */
2963 * search miss - non-leaf page:
2965 * base is the smallest index with key (Kj) greater than
2966 * search key (K) and may be zero or maxentry index.
2967 * if base is non-zero, decrement base by one to get the parent
2968 * entry of the child page to search.
2970 index
= base
? base
- 1 : base
;
2973 * go down to child page
2976 /* get the child page block number */
2977 bn
= addressXAD(&p
->xad
[index
]);
2979 /* unpin the parent page */
2989 * link around a freed page.
2998 static int xtRelink(tid_t tid
, struct inode
*ip
, xtpage_t
* p
)
3001 struct metapage
*mp
;
3005 nextbn
= le64_to_cpu(p
->header
.next
);
3006 prevbn
= le64_to_cpu(p
->header
.prev
);
3008 /* update prev pointer of the next page */
3010 XT_GETPAGE(ip
, nextbn
, mp
, PSIZE
, p
, rc
);
3015 * acquire a transaction lock on the page;
3017 * action: update prev pointer;
3019 BT_MARK_DIRTY(mp
, ip
);
3020 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckRELINK
);
3022 /* the page may already have been tlock'd */
3024 p
->header
.prev
= cpu_to_le64(prevbn
);
3029 /* update next pointer of the previous page */
3031 XT_GETPAGE(ip
, prevbn
, mp
, PSIZE
, p
, rc
);
3036 * acquire a transaction lock on the page;
3038 * action: update next pointer;
3040 BT_MARK_DIRTY(mp
, ip
);
3041 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
| tlckRELINK
);
3043 /* the page may already have been tlock'd */
3045 p
->header
.next
= le64_to_cpu(nextbn
);
3052 #endif /* _STILL_TO_PORT */
3058 * initialize file root (inline in inode)
3060 void xtInitRoot(tid_t tid
, struct inode
*ip
)
3065 * acquire a transaction lock on the root
3069 txLock(tid
, ip
, (struct metapage
*) &JFS_IP(ip
)->bxflag
,
3070 tlckXTREE
| tlckNEW
);
3071 p
= &JFS_IP(ip
)->i_xtroot
;
3073 p
->header
.flag
= DXD_INDEX
| BT_ROOT
| BT_LEAF
;
3074 p
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
);
3076 if (S_ISDIR(ip
->i_mode
))
3077 p
->header
.maxentry
= cpu_to_le16(XTROOTINITSLOT_DIR
);
3079 p
->header
.maxentry
= cpu_to_le16(XTROOTINITSLOT
);
3089 * We can run into a deadlock truncating a file with a large number of
3090 * xtree pages (large fragmented file). A robust fix would entail a
3091 * reservation system where we would reserve a number of metadata pages
3092 * and tlocks which we would be guaranteed without a deadlock. Without
3093 * this, a partial fix is to limit number of metadata pages we will lock
3094 * in a single transaction. Currently we will truncate the file so that
3095 * no more than 50 leaf pages will be locked. The caller of xtTruncate
3096 * will be responsible for ensuring that the current transaction gets
3097 * committed, and that subsequent transactions are created to truncate
3098 * the file further if needed.
3100 #define MAX_TRUNCATE_LEAVES 50
3106 * traverse for truncation logging backward bottom up;
3107 * terminate at the last extent entry at the current subtree
3108 * root page covering new down size.
3109 * truncation may occur within the last extent entry.
3115 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3121 * 1. truncate (non-COMMIT_NOLINK file)
3122 * by jfs_truncate() or jfs_open(O_TRUNC):
3124 * 2. truncate index table of directory when last entry removed
3125 * map update via tlock at commit time;
3127 * Call xtTruncate_pmap instead
3129 * 1. remove (free zero link count) on last reference release
3130 * (pmap has been freed at commit zero link count);
3131 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3133 * map update directly at truncation time;
3136 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3137 * else if (TRUNCATE)
3138 * must write LOG_NOREDOPAGE for deleted index page;
3140 * pages may already have been tlocked by anonymous transactions
3141 * during file growth (i.e., write) before truncation;
3143 * except last truncated entry, deleted entries remains as is
3144 * in the page (nextindex is updated) for other use
3145 * (e.g., log/update allocation map): this avoid copying the page
3146 * info but delay free of pages;
3149 s64
xtTruncate(tid_t tid
, struct inode
*ip
, s64 newsize
, int flag
)
3153 struct metapage
*mp
;
3156 int index
, nextindex
;
3159 int xlen
, len
, freexlen
;
3160 struct btstack btstack
;
3161 struct btframe
*parent
;
3162 struct tblock
*tblk
= NULL
;
3163 struct tlock
*tlck
= NULL
;
3164 struct xtlock
*xtlck
= NULL
;
3165 struct xdlistlock xadlock
; /* maplock for COMMIT_WMAP */
3166 struct pxd_lock
*pxdlock
; /* maplock for COMMIT_WMAP */
3169 int locked_leaves
= 0;
3171 /* save object truncation type */
3173 tblk
= tid_to_tblock(tid
);
3174 tblk
->xflag
|= flag
;
3180 assert(flag
!= COMMIT_PMAP
);
3182 if (flag
== COMMIT_PWMAP
)
3186 xadlock
.flag
= mlckFREEXADLIST
;
3191 * if the newsize is not an integral number of pages,
3192 * the file between newsize and next page boundary will
3194 * if truncating into a file hole, it will cause
3195 * a full block to be allocated for the logical block.
3199 * release page blocks of truncated region <teof, eof>
3201 * free the data blocks from the leaf index blocks.
3202 * delete the parent index entries corresponding to
3203 * the freed child data/index blocks.
3204 * free the index blocks themselves which aren't needed
3205 * in new sized file.
3207 * index blocks are updated only if the blocks are to be
3208 * retained in the new sized file.
3209 * if type is PMAP, the data and index pages are NOT
3210 * freed, and the data and index blocks are NOT freed
3212 * (this will allow continued access of data/index of
3213 * temporary file (zerolink count file truncated to zero-length)).
3215 teof
= (newsize
+ (JFS_SBI(ip
->i_sb
)->bsize
- 1)) >>
3216 JFS_SBI(ip
->i_sb
)->l2bsize
;
3224 * root resides in the inode
3229 * first access of each page:
3232 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
3236 /* process entries backward from last index */
3237 index
= le16_to_cpu(p
->header
.nextindex
) - 1;
3240 /* Since this is the rightmost page at this level, and we may have
3241 * already freed a page that was formerly to the right, let's make
3242 * sure that the next pointer is zero.
3244 if (p
->header
.next
) {
3247 * Make sure this change to the header is logged.
3248 * If we really truncate this leaf, the flag
3249 * will be changed to tlckTRUNCATE
3251 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
|tlckGROW
);
3252 BT_MARK_DIRTY(mp
, ip
);
3256 if (p
->header
.flag
& BT_INTERNAL
)
3264 /* does region covered by leaf page precede Teof ? */
3265 xad
= &p
->xad
[index
];
3266 xoff
= offsetXAD(xad
);
3267 xlen
= lengthXAD(xad
);
3268 if (teof
>= xoff
+ xlen
) {
3273 /* (re)acquire tlock of the leaf page */
3275 if (++locked_leaves
> MAX_TRUNCATE_LEAVES
) {
3277 * We need to limit the size of the transaction
3278 * to avoid exhausting pagecache & tlocks
3281 newsize
= (xoff
+ xlen
) << JFS_SBI(ip
->i_sb
)->l2bsize
;
3284 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
3285 tlck
->type
= tlckXTREE
| tlckTRUNCATE
;
3286 xtlck
= (struct xtlock
*) & tlck
->lock
;
3287 xtlck
->hwm
.offset
= le16_to_cpu(p
->header
.nextindex
) - 1;
3289 BT_MARK_DIRTY(mp
, ip
);
3292 * scan backward leaf page entries
3294 for (; index
>= XTENTRYSTART
; index
--) {
3295 xad
= &p
->xad
[index
];
3296 xoff
= offsetXAD(xad
);
3297 xlen
= lengthXAD(xad
);
3298 xaddr
= addressXAD(xad
);
3301 * The "data" for a directory is indexed by the block
3302 * device's address space. This metadata must be invalidated
3305 if (S_ISDIR(ip
->i_mode
) && (teof
== 0))
3306 invalidate_xad_metapages(ip
, *xad
);
3308 * entry beyond eof: continue scan of current page
3310 * ---|---=======------->
3319 * (xoff <= teof): last entry to be deleted from page;
3320 * If other entries remain in page: keep and update the page.
3324 * eof == entry_start: delete the entry
3326 * -------|=======------->
3333 if (index
== XTENTRYSTART
)
3339 * eof within the entry: truncate the entry.
3341 * -------===|===------->
3344 else if (teof
< xoff
+ xlen
) {
3345 /* update truncated entry */
3347 freexlen
= xlen
- len
;
3348 XADlength(xad
, len
);
3350 /* save pxd of truncated extent in tlck */
3352 if (log
) { /* COMMIT_PWMAP */
3353 xtlck
->lwm
.offset
= (xtlck
->lwm
.offset
) ?
3354 min(index
, (int)xtlck
->lwm
.offset
) : index
;
3355 xtlck
->lwm
.length
= index
+ 1 -
3357 xtlck
->twm
.offset
= index
;
3358 pxdlock
= (struct pxd_lock
*) & xtlck
->pxdlock
;
3359 pxdlock
->flag
= mlckFREEPXD
;
3360 PXDaddress(&pxdlock
->pxd
, xaddr
);
3361 PXDlength(&pxdlock
->pxd
, freexlen
);
3363 /* free truncated extent */
3364 else { /* COMMIT_WMAP */
3366 pxdlock
= (struct pxd_lock
*) & xadlock
;
3367 pxdlock
->flag
= mlckFREEPXD
;
3368 PXDaddress(&pxdlock
->pxd
, xaddr
);
3369 PXDlength(&pxdlock
->pxd
, freexlen
);
3370 txFreeMap(ip
, pxdlock
, NULL
, COMMIT_WMAP
);
3372 /* reset map lock */
3373 xadlock
.flag
= mlckFREEXADLIST
;
3376 /* current entry is new last entry; */
3377 nextindex
= index
+ 1;
3382 * eof beyond the entry:
3384 * -------=======---|--->
3387 else { /* (xoff + xlen < teof) */
3389 nextindex
= index
+ 1;
3392 if (nextindex
< le16_to_cpu(p
->header
.nextindex
)) {
3393 if (!log
) { /* COMMIT_WAMP */
3394 xadlock
.xdlist
= &p
->xad
[nextindex
];
3396 le16_to_cpu(p
->header
.nextindex
) -
3398 txFreeMap(ip
, (struct maplock
*) & xadlock
,
3401 p
->header
.nextindex
= cpu_to_le16(nextindex
);
3406 /* assert(freed == 0); */
3408 } /* end scan of leaf page entries */
3413 * leaf page become empty: free the page if type != PMAP
3415 if (log
) { /* COMMIT_PWMAP */
3416 /* txCommit() with tlckFREE:
3417 * free data extents covered by leaf [XTENTRYSTART:hwm);
3418 * invalidate leaf if COMMIT_PWMAP;
3419 * if (TRUNCATE), will write LOG_NOREDOPAGE;
3421 tlck
->type
= tlckXTREE
| tlckFREE
;
3422 } else { /* COMMIT_WAMP */
3424 /* free data extents covered by leaf */
3425 xadlock
.xdlist
= &p
->xad
[XTENTRYSTART
];
3427 le16_to_cpu(p
->header
.nextindex
) - XTENTRYSTART
;
3428 txFreeMap(ip
, (struct maplock
*) & xadlock
, NULL
, COMMIT_WMAP
);
3431 if (p
->header
.flag
& BT_ROOT
) {
3432 p
->header
.flag
&= ~BT_INTERNAL
;
3433 p
->header
.flag
|= BT_LEAF
;
3434 p
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
);
3436 XT_PUTPAGE(mp
); /* debug */
3439 if (log
) { /* COMMIT_PWMAP */
3440 /* page will be invalidated at tx completion
3443 } else { /* COMMIT_WMAP */
3446 lid_to_tlock(mp
->lid
)->flag
|= tlckFREELOCK
;
3448 /* invalidate empty leaf page */
3449 discard_metapage(mp
);
3454 * the leaf page become empty: delete the parent entry
3455 * for the leaf page if the parent page is to be kept
3456 * in the new sized file.
3460 * go back up to the parent page
3463 /* pop/restore parent entry for the current child page */
3464 if ((parent
= BT_POP(&btstack
)) == NULL
)
3465 /* current page must have been root */
3468 /* get back the parent page */
3470 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
3474 index
= parent
->index
;
3477 * child page was not empty:
3480 /* has any entry deleted from parent ? */
3481 if (index
< le16_to_cpu(p
->header
.nextindex
) - 1) {
3482 /* (re)acquire tlock on the parent page */
3483 if (log
) { /* COMMIT_PWMAP */
3484 /* txCommit() with tlckTRUNCATE:
3485 * free child extents covered by parent [);
3487 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
3488 xtlck
= (struct xtlock
*) & tlck
->lock
;
3489 if (!(tlck
->type
& tlckTRUNCATE
)) {
3491 le16_to_cpu(p
->header
.
3494 tlckXTREE
| tlckTRUNCATE
;
3496 } else { /* COMMIT_WMAP */
3498 /* free child extents covered by parent */
3499 xadlock
.xdlist
= &p
->xad
[index
+ 1];
3501 le16_to_cpu(p
->header
.nextindex
) -
3503 txFreeMap(ip
, (struct maplock
*) & xadlock
,
3506 BT_MARK_DIRTY(mp
, ip
);
3508 p
->header
.nextindex
= cpu_to_le16(index
+ 1);
3515 * child page was empty:
3517 nfreed
+= lengthXAD(&p
->xad
[index
]);
3520 * During working map update, child page's tlock must be handled
3521 * before parent's. This is because the parent's tlock will cause
3522 * the child's disk space to be marked available in the wmap, so
3523 * it's important that the child page be released by that time.
3525 * ToDo: tlocks should be on doubly-linked list, so we can
3526 * quickly remove it and add it to the end.
3530 * Move parent page's tlock to the end of the tid's tlock list
3532 if (log
&& mp
->lid
&& (tblk
->last
!= mp
->lid
) &&
3533 lid_to_tlock(mp
->lid
)->tid
) {
3534 lid_t lid
= mp
->lid
;
3537 tlck
= lid_to_tlock(lid
);
3539 if (tblk
->next
== lid
)
3540 tblk
->next
= tlck
->next
;
3542 for (prev
= lid_to_tlock(tblk
->next
);
3544 prev
= lid_to_tlock(prev
->next
)) {
3547 prev
->next
= tlck
->next
;
3549 lid_to_tlock(tblk
->last
)->next
= lid
;
3555 * parent page become empty: free the page
3557 if (index
== XTENTRYSTART
) {
3558 if (log
) { /* COMMIT_PWMAP */
3559 /* txCommit() with tlckFREE:
3560 * free child extents covered by parent;
3561 * invalidate parent if COMMIT_PWMAP;
3563 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
3564 xtlck
= (struct xtlock
*) & tlck
->lock
;
3566 le16_to_cpu(p
->header
.nextindex
) - 1;
3567 tlck
->type
= tlckXTREE
| tlckFREE
;
3568 } else { /* COMMIT_WMAP */
3570 /* free child extents covered by parent */
3571 xadlock
.xdlist
= &p
->xad
[XTENTRYSTART
];
3573 le16_to_cpu(p
->header
.nextindex
) -
3575 txFreeMap(ip
, (struct maplock
*) & xadlock
, NULL
,
3578 BT_MARK_DIRTY(mp
, ip
);
3580 if (p
->header
.flag
& BT_ROOT
) {
3581 p
->header
.flag
&= ~BT_INTERNAL
;
3582 p
->header
.flag
|= BT_LEAF
;
3583 p
->header
.nextindex
= cpu_to_le16(XTENTRYSTART
);
3584 if (le16_to_cpu(p
->header
.maxentry
) == XTROOTMAXSLOT
) {
3586 * Shrink root down to allow inline
3587 * EA (otherwise fsck complains)
3589 p
->header
.maxentry
=
3590 cpu_to_le16(XTROOTINITSLOT
);
3591 JFS_IP(ip
)->mode2
|= INLINEEA
;
3594 XT_PUTPAGE(mp
); /* debug */
3597 if (log
) { /* COMMIT_PWMAP */
3598 /* page will be invalidated at tx completion
3601 } else { /* COMMIT_WMAP */
3604 lid_to_tlock(mp
->lid
)->flag
|=
3607 /* invalidate parent page */
3608 discard_metapage(mp
);
3611 /* parent has become empty and freed:
3612 * go back up to its parent page
3619 * parent page still has entries for front region;
3622 /* try truncate region covered by preceding entry
3623 * (process backward)
3627 /* go back down to the child page corresponding
3634 * internal page: go down to child page of current entry
3637 /* save current parent entry for the child page */
3638 if (BT_STACK_FULL(&btstack
)) {
3639 jfs_error(ip
->i_sb
, "stack overrun!\n");
3643 BT_PUSH(&btstack
, bn
, index
);
3645 /* get child page */
3646 xad
= &p
->xad
[index
];
3647 bn
= addressXAD(xad
);
3650 * first access of each internal entry:
3652 /* release parent page */
3655 /* process the child page */
3660 * update file resource stat
3664 if (S_ISDIR(ip
->i_mode
) && !newsize
)
3665 ip
->i_size
= 1; /* fsck hates zero-length directories */
3667 ip
->i_size
= newsize
;
3669 /* update quota allocation to reflect freed blocks */
3670 dquot_free_block(ip
, nfreed
);
3673 * free tlock of invalidated pages
3675 if (flag
== COMMIT_WMAP
)
3686 * Perform truncate to zero length for deleted file, leaving the
3687 * xtree and working map untouched. This allows the file to
3688 * be accessed via open file handles, while the delete of the file
3689 * is committed to disk.
3694 * s64 committed_size)
3696 * return: new committed size
3700 * To avoid deadlock by holding too many transaction locks, the
3701 * truncation may be broken up into multiple transactions.
3702 * The committed_size keeps track of part of the file has been
3703 * freed from the pmaps.
3705 s64
xtTruncate_pmap(tid_t tid
, struct inode
*ip
, s64 committed_size
)
3708 struct btstack btstack
;
3711 int locked_leaves
= 0;
3712 struct metapage
*mp
;
3714 struct btframe
*parent
;
3716 struct tblock
*tblk
;
3717 struct tlock
*tlck
= NULL
;
3721 struct xtlock
*xtlck
= NULL
;
3723 /* save object truncation type */
3724 tblk
= tid_to_tblock(tid
);
3725 tblk
->xflag
|= COMMIT_PMAP
;
3730 if (committed_size
) {
3731 xoff
= (committed_size
>> JFS_SBI(ip
->i_sb
)->l2bsize
) - 1;
3732 rc
= xtSearch(ip
, xoff
, NULL
, &cmp
, &btstack
, 0);
3736 XT_GETSEARCH(ip
, btstack
.top
, bn
, mp
, p
, index
);
3740 jfs_error(ip
->i_sb
, "did not find extent\n");
3747 * root resides in the inode
3752 * first access of each page:
3755 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
3759 /* process entries backward from last index */
3760 index
= le16_to_cpu(p
->header
.nextindex
) - 1;
3762 if (p
->header
.flag
& BT_INTERNAL
)
3770 if (++locked_leaves
> MAX_TRUNCATE_LEAVES
) {
3772 * We need to limit the size of the transaction
3773 * to avoid exhausting pagecache & tlocks
3775 xad
= &p
->xad
[index
];
3776 xoff
= offsetXAD(xad
);
3777 xlen
= lengthXAD(xad
);
3779 return (xoff
+ xlen
) << JFS_SBI(ip
->i_sb
)->l2bsize
;
3781 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
3782 tlck
->type
= tlckXTREE
| tlckFREE
;
3783 xtlck
= (struct xtlock
*) & tlck
->lock
;
3784 xtlck
->hwm
.offset
= index
;
3790 * go back up to the parent page
3793 /* pop/restore parent entry for the current child page */
3794 if ((parent
= BT_POP(&btstack
)) == NULL
)
3795 /* current page must have been root */
3798 /* get back the parent page */
3800 XT_GETPAGE(ip
, bn
, mp
, PSIZE
, p
, rc
);
3804 index
= parent
->index
;
3807 * parent page become empty: free the page
3809 if (index
== XTENTRYSTART
) {
3810 /* txCommit() with tlckFREE:
3811 * free child extents covered by parent;
3812 * invalidate parent if COMMIT_PWMAP;
3814 tlck
= txLock(tid
, ip
, mp
, tlckXTREE
);
3815 xtlck
= (struct xtlock
*) & tlck
->lock
;
3816 xtlck
->hwm
.offset
= le16_to_cpu(p
->header
.nextindex
) - 1;
3817 tlck
->type
= tlckXTREE
| tlckFREE
;
3821 if (p
->header
.flag
& BT_ROOT
) {
3829 * parent page still has entries for front region;
3834 * internal page: go down to child page of current entry
3837 /* save current parent entry for the child page */
3838 if (BT_STACK_FULL(&btstack
)) {
3839 jfs_error(ip
->i_sb
, "stack overrun!\n");
3843 BT_PUSH(&btstack
, bn
, index
);
3845 /* get child page */
3846 xad
= &p
->xad
[index
];
3847 bn
= addressXAD(xad
);
3850 * first access of each internal entry:
3852 /* release parent page */
3855 /* process the child page */
3863 #ifdef CONFIG_JFS_STATISTICS
3864 int jfs_xtstat_proc_show(struct seq_file
*m
, void *v
)
3867 "JFS Xtree statistics\n"
3868 "====================\n"
3870 "fast searches = %d\n"