1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4 * Copyright (c) 2014 Christoph Hellwig.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
15 #include "xfs_bmap_util.h"
16 #include "xfs_alloc.h"
17 #include "xfs_mru_cache.h"
18 #include "xfs_trace.h"
20 #include "xfs_ag_resv.h"
21 #include "xfs_trans.h"
22 #include "xfs_filestream.h"
24 struct xfs_fstrm_item
{
25 struct xfs_mru_cache_elem mru
;
26 struct xfs_perag
*pag
; /* AG in use for this directory */
29 enum xfs_fstrm_alloc
{
30 XFS_PICK_USERDATA
= 1,
31 XFS_PICK_LOWSPACE
= 2,
37 struct xfs_mru_cache_elem
*mru
)
39 struct xfs_fstrm_item
*item
=
40 container_of(mru
, struct xfs_fstrm_item
, mru
);
41 struct xfs_perag
*pag
= item
->pag
;
43 trace_xfs_filestream_free(pag
, mru
->key
);
44 atomic_dec(&pag
->pagf_fstrms
);
51 * Scan the AGs starting at start_agno looking for an AG that isn't in use and
52 * has at least minlen blocks free. If no AG is found to match the allocation
53 * requirements, pick the AG with the most free space in it.
56 xfs_filestream_pick_ag(
57 struct xfs_alloc_arg
*args
,
59 xfs_agnumber_t start_agno
,
61 xfs_extlen_t
*longest
)
63 struct xfs_mount
*mp
= args
->mp
;
64 struct xfs_perag
*pag
;
65 struct xfs_perag
*max_pag
= NULL
;
66 xfs_extlen_t minlen
= *longest
;
67 xfs_extlen_t minfree
, maxfree
= 0;
69 bool first_pass
= true;
71 /* 2% of an AG's blocks must be free for it to be chosen. */
72 minfree
= mp
->m_sb
.sb_agblocks
/ 50;
75 for_each_perag_wrap(mp
, start_agno
, agno
, pag
) {
78 trace_xfs_filestream_scan(pag
, pino
);
81 err
= xfs_bmap_longest_free_extent(pag
, NULL
, longest
);
84 /* Couldn't lock the AGF, skip this AG. */
90 xfs_perag_rele(max_pag
);
94 /* Keep track of the AG with the most free blocks. */
95 if (pag
->pagf_freeblks
> maxfree
) {
96 maxfree
= pag
->pagf_freeblks
;
98 xfs_perag_rele(max_pag
);
99 atomic_inc(&pag_group(pag
)->xg_active_ref
);
104 * The AG reference count does two things: it enforces mutual
105 * exclusion when examining the suitability of an AG in this
106 * loop, and it guards against two filestreams being established
107 * in the same AG as each other.
109 if (atomic_inc_return(&pag
->pagf_fstrms
) <= 1) {
110 if (((minlen
&& *longest
>= minlen
) ||
111 (!minlen
&& pag
->pagf_freeblks
>= minfree
)) &&
112 (!xfs_perag_prefers_metadata(pag
) ||
113 !(flags
& XFS_PICK_USERDATA
) ||
114 (flags
& XFS_PICK_LOWSPACE
))) {
115 /* Break out, retaining the reference on the AG. */
117 xfs_perag_rele(max_pag
);
122 /* Drop the reference on this AG, it's not usable. */
123 atomic_dec(&pag
->pagf_fstrms
);
127 * Allow a second pass to give xfs_bmap_longest_free_extent() another
128 * attempt at locking AGFs that it might have skipped over before we
137 * We must be low on data space, so run a final lowspace optimised
138 * selection pass if we haven't already.
140 if (!(flags
& XFS_PICK_LOWSPACE
)) {
141 flags
|= XFS_PICK_LOWSPACE
;
146 * No unassociated AGs are available, so select the AG with the most
147 * free space, regardless of whether it's already in use by another
148 * filestream. It none suit, just use whatever AG we can grab.
151 for_each_perag_wrap(args
->mp
, 0, start_agno
, pag
) {
156 /* Bail if there are no AGs at all to select from. */
162 atomic_inc(&pag
->pagf_fstrms
);
164 trace_xfs_filestream_pick(pag
, pino
);
169 static struct xfs_inode
*
170 xfs_filestream_get_parent(
171 struct xfs_inode
*ip
)
173 struct inode
*inode
= VFS_I(ip
), *dir
= NULL
;
174 struct dentry
*dentry
, *parent
;
176 dentry
= d_find_alias(inode
);
180 parent
= dget_parent(dentry
);
184 dir
= igrab(d_inode(parent
));
190 return dir
? XFS_I(dir
) : NULL
;
194 * Lookup the mru cache for an existing association. If one exists and we can
195 * use it, return with an active perag reference indicating that the allocation
196 * will proceed with that association.
198 * If we have no association, or we cannot use the current one and have to
199 * destroy it, return with longest = 0 to tell the caller to create a new
203 xfs_filestream_lookup_association(
204 struct xfs_bmalloca
*ap
,
205 struct xfs_alloc_arg
*args
,
207 xfs_extlen_t
*longest
)
209 struct xfs_mount
*mp
= args
->mp
;
210 struct xfs_perag
*pag
;
211 struct xfs_mru_cache_elem
*mru
;
215 mru
= xfs_mru_cache_lookup(mp
->m_filestream
, pino
);
219 * Grab the pag and take an extra active reference for the caller whilst
220 * the mru item cannot go away. This means we'll pin the perag with
221 * the reference we get here even if the filestreams association is torn
222 * down immediately after we mark the lookup as done.
224 pag
= container_of(mru
, struct xfs_fstrm_item
, mru
)->pag
;
225 atomic_inc(&pag_group(pag
)->xg_active_ref
);
226 xfs_mru_cache_done(mp
->m_filestream
);
228 trace_xfs_filestream_lookup(pag
, ap
->ip
->i_ino
);
230 ap
->blkno
= xfs_agbno_to_fsb(pag
, 0);
231 xfs_bmap_adjacent(ap
);
234 * If there is very little free space before we start a filestreams
235 * allocation, we're almost guaranteed to fail to find a large enough
236 * free space available so just use the cached AG.
238 if (ap
->tp
->t_flags
& XFS_TRANS_LOWMODE
) {
243 error
= xfs_bmap_longest_free_extent(pag
, args
->tp
, longest
);
244 if (error
== -EAGAIN
)
246 if (error
|| *longest
< args
->maxlen
) {
247 /* We aren't going to use this perag */
259 xfs_filestream_create_association(
260 struct xfs_bmalloca
*ap
,
261 struct xfs_alloc_arg
*args
,
263 xfs_extlen_t
*longest
)
265 struct xfs_mount
*mp
= args
->mp
;
266 struct xfs_mru_cache_elem
*mru
;
267 struct xfs_fstrm_item
*item
;
268 xfs_agnumber_t agno
= XFS_INO_TO_AGNO(mp
, pino
);
272 /* Changing parent AG association now, so remove the existing one. */
273 mru
= xfs_mru_cache_remove(mp
->m_filestream
, pino
);
275 struct xfs_fstrm_item
*item
=
276 container_of(mru
, struct xfs_fstrm_item
, mru
);
278 agno
= (pag_agno(item
->pag
) + 1) % mp
->m_sb
.sb_agcount
;
279 xfs_fstrm_free_func(mp
, mru
);
280 } else if (xfs_is_inode32(mp
)) {
281 xfs_agnumber_t rotorstep
= xfs_rotorstep
;
283 agno
= (mp
->m_agfrotor
/ rotorstep
) % mp
->m_sb
.sb_agcount
;
284 mp
->m_agfrotor
= (mp
->m_agfrotor
+ 1) %
285 (mp
->m_sb
.sb_agcount
* rotorstep
);
288 ap
->blkno
= XFS_AGB_TO_FSB(args
->mp
, agno
, 0);
289 xfs_bmap_adjacent(ap
);
291 if (ap
->datatype
& XFS_ALLOC_USERDATA
)
292 flags
|= XFS_PICK_USERDATA
;
293 if (ap
->tp
->t_flags
& XFS_TRANS_LOWMODE
)
294 flags
|= XFS_PICK_LOWSPACE
;
296 *longest
= ap
->length
;
297 error
= xfs_filestream_pick_ag(args
, pino
, agno
, flags
, longest
);
302 * We are going to use this perag now, so create an assoication for it.
303 * xfs_filestream_pick_ag() has already bumped the perag fstrms counter
304 * for us, so all we need to do here is take another active reference to
305 * the perag for the cached association.
307 * If we fail to store the association, we need to drop the fstrms
308 * counter as well as drop the perag reference we take here for the
309 * item. We do not need to return an error for this failure - as long as
310 * we return a referenced AG, the allocation can still go ahead just
313 item
= kmalloc(sizeof(*item
), GFP_KERNEL
| __GFP_RETRY_MAYFAIL
);
317 atomic_inc(&pag_group(args
->pag
)->xg_active_ref
);
318 item
->pag
= args
->pag
;
319 error
= xfs_mru_cache_insert(mp
->m_filestream
, pino
, &item
->mru
);
325 xfs_perag_rele(item
->pag
);
328 atomic_dec(&args
->pag
->pagf_fstrms
);
333 * Search for an allocation group with a single extent large enough for
334 * the request. First we look for an existing association and use that if it
335 * is found. Otherwise, we create a new association by selecting an AG that fits
336 * the allocation criteria.
338 * We return with a referenced perag in args->pag to indicate which AG we are
339 * allocating into or an error with no references held.
342 xfs_filestream_select_ag(
343 struct xfs_bmalloca
*ap
,
344 struct xfs_alloc_arg
*args
,
345 xfs_extlen_t
*longest
)
347 struct xfs_inode
*pip
;
352 args
->total
= ap
->total
;
353 pip
= xfs_filestream_get_parent(ap
->ip
);
356 error
= xfs_filestream_lookup_association(ap
, args
, ino
,
361 if (*longest
>= args
->maxlen
)
363 if (ap
->tp
->t_flags
& XFS_TRANS_LOWMODE
)
367 error
= xfs_filestream_create_association(ap
, args
, ino
, longest
);
372 ap
->blkno
= xfs_agbno_to_fsb(args
->pag
, 0);
377 xfs_filestream_deassociate(
378 struct xfs_inode
*ip
)
380 xfs_mru_cache_delete(ip
->i_mount
->m_filestream
, ip
->i_ino
);
384 xfs_filestream_mount(
388 * The filestream timer tunable is currently fixed within the range of
389 * one second to four minutes, with five seconds being the default. The
390 * group count is somewhat arbitrary, but it'd be nice to adhere to the
391 * timer tunable to within about 10 percent. This requires at least 10
394 return xfs_mru_cache_create(&mp
->m_filestream
, mp
,
395 xfs_fstrm_centisecs
* 10, 10, xfs_fstrm_free_func
);
399 xfs_filestream_unmount(
402 xfs_mru_cache_destroy(mp
->m_filestream
);