1 // SPDX-License-Identifier: GPL-2.0
3 * quota.c - CephFS quota
5 * Copyright (C) 2017-2018 SUSE
8 #include <linux/statfs.h>
11 #include "mds_client.h"
13 void ceph_adjust_quota_realms_count(struct inode
*inode
, bool inc
)
15 struct ceph_mds_client
*mdsc
= ceph_sb_to_mdsc(inode
->i_sb
);
17 atomic64_inc(&mdsc
->quotarealms_count
);
19 atomic64_dec(&mdsc
->quotarealms_count
);
22 static inline bool ceph_has_realms_with_quotas(struct inode
*inode
)
24 struct super_block
*sb
= inode
->i_sb
;
25 struct ceph_mds_client
*mdsc
= ceph_sb_to_mdsc(sb
);
26 struct inode
*root
= d_inode(sb
->s_root
);
28 if (atomic64_read(&mdsc
->quotarealms_count
) > 0)
30 /* if root is the real CephFS root, we don't have quota realms */
31 if (root
&& ceph_ino(root
) == CEPH_INO_ROOT
)
33 /* MDS stray dirs have no quota realms */
34 if (ceph_vino_is_reserved(ceph_inode(inode
)->i_vino
))
36 /* otherwise, we can't know for sure */
40 void ceph_handle_quota(struct ceph_mds_client
*mdsc
,
41 struct ceph_mds_session
*session
,
44 struct super_block
*sb
= mdsc
->fsc
->sb
;
45 struct ceph_mds_quota
*h
= msg
->front
.iov_base
;
46 struct ceph_client
*cl
= mdsc
->fsc
->client
;
47 struct ceph_vino vino
;
49 struct ceph_inode_info
*ci
;
51 if (!ceph_inc_mds_stopping_blocker(mdsc
, session
))
54 if (msg
->front
.iov_len
< sizeof(*h
)) {
55 pr_err_client(cl
, "corrupt message mds%d len %d\n",
56 session
->s_mds
, (int)msg
->front
.iov_len
);
62 vino
.ino
= le64_to_cpu(h
->ino
);
63 vino
.snap
= CEPH_NOSNAP
;
64 inode
= ceph_find_inode(sb
, vino
);
66 pr_warn_client(cl
, "failed to find inode %llx\n", vino
.ino
);
69 ci
= ceph_inode(inode
);
71 spin_lock(&ci
->i_ceph_lock
);
72 ci
->i_rbytes
= le64_to_cpu(h
->rbytes
);
73 ci
->i_rfiles
= le64_to_cpu(h
->rfiles
);
74 ci
->i_rsubdirs
= le64_to_cpu(h
->rsubdirs
);
75 __ceph_update_quota(ci
, le64_to_cpu(h
->max_bytes
),
76 le64_to_cpu(h
->max_files
));
77 spin_unlock(&ci
->i_ceph_lock
);
81 ceph_dec_mds_stopping_blocker(mdsc
);
84 static struct ceph_quotarealm_inode
*
85 find_quotarealm_inode(struct ceph_mds_client
*mdsc
, u64 ino
)
87 struct ceph_quotarealm_inode
*qri
= NULL
;
88 struct rb_node
**node
, *parent
= NULL
;
89 struct ceph_client
*cl
= mdsc
->fsc
->client
;
91 mutex_lock(&mdsc
->quotarealms_inodes_mutex
);
92 node
= &(mdsc
->quotarealms_inodes
.rb_node
);
95 qri
= container_of(*node
, struct ceph_quotarealm_inode
, node
);
98 node
= &((*node
)->rb_left
);
99 else if (ino
> qri
->ino
)
100 node
= &((*node
)->rb_right
);
104 if (!qri
|| (qri
->ino
!= ino
)) {
105 /* Not found, create a new one and insert it */
106 qri
= kmalloc(sizeof(*qri
), GFP_KERNEL
);
111 mutex_init(&qri
->mutex
);
112 rb_link_node(&qri
->node
, parent
, node
);
113 rb_insert_color(&qri
->node
, &mdsc
->quotarealms_inodes
);
115 pr_warn_client(cl
, "Failed to alloc quotarealms_inode\n");
117 mutex_unlock(&mdsc
->quotarealms_inodes_mutex
);
123 * This function will try to lookup a realm inode which isn't visible in the
124 * filesystem mountpoint. A list of these kind of inodes (not visible) is
125 * maintained in the mdsc and freed only when the filesystem is umounted.
127 * Note that these inodes are kept in this list even if the lookup fails, which
128 * allows to prevent useless lookup requests.
130 static struct inode
*lookup_quotarealm_inode(struct ceph_mds_client
*mdsc
,
131 struct super_block
*sb
,
132 struct ceph_snap_realm
*realm
)
134 struct ceph_client
*cl
= mdsc
->fsc
->client
;
135 struct ceph_quotarealm_inode
*qri
;
138 qri
= find_quotarealm_inode(mdsc
, realm
->ino
);
142 mutex_lock(&qri
->mutex
);
143 if (qri
->inode
&& ceph_is_any_caps(qri
->inode
)) {
144 /* A request has already returned the inode */
145 mutex_unlock(&qri
->mutex
);
148 /* Check if this inode lookup has failed recently */
150 time_before_eq(jiffies
, qri
->timeout
)) {
151 mutex_unlock(&qri
->mutex
);
156 int ret
= __ceph_do_getattr(qri
->inode
, NULL
,
157 CEPH_STAT_CAP_INODE
, true);
163 in
= ceph_lookup_inode(sb
, realm
->ino
);
167 doutc(cl
, "Can't lookup inode %llx (err: %ld)\n", realm
->ino
,
169 qri
->timeout
= jiffies
+ msecs_to_jiffies(60 * 1000); /* XXX */
174 mutex_unlock(&qri
->mutex
);
179 void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client
*mdsc
)
181 struct ceph_quotarealm_inode
*qri
;
182 struct rb_node
*node
;
185 * It should now be safe to clean quotarealms_inode tree without holding
186 * mdsc->quotarealms_inodes_mutex...
188 mutex_lock(&mdsc
->quotarealms_inodes_mutex
);
189 while (!RB_EMPTY_ROOT(&mdsc
->quotarealms_inodes
)) {
190 node
= rb_first(&mdsc
->quotarealms_inodes
);
191 qri
= rb_entry(node
, struct ceph_quotarealm_inode
, node
);
192 rb_erase(node
, &mdsc
->quotarealms_inodes
);
196 mutex_unlock(&mdsc
->quotarealms_inodes_mutex
);
200 * This function walks through the snaprealm for an inode and set the
201 * realmp with the first snaprealm that has quotas set (max_files,
202 * max_bytes, or any, depending on the 'which_quota' argument). If the root is
203 * reached, set the realmp with the root ceph_snap_realm instead.
205 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
208 * Callers of this function need to hold mdsc->snap_rwsem. However, if there's
209 * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
210 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
211 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
214 static int get_quota_realm(struct ceph_mds_client
*mdsc
, struct inode
*inode
,
215 enum quota_get_realm which_quota
,
216 struct ceph_snap_realm
**realmp
, bool retry
)
218 struct ceph_client
*cl
= mdsc
->fsc
->client
;
219 struct ceph_inode_info
*ci
= NULL
;
220 struct ceph_snap_realm
*realm
, *next
;
226 if (ceph_snap(inode
) != CEPH_NOSNAP
)
230 realm
= ceph_inode(inode
)->i_snap_realm
;
232 ceph_get_snap_realm(mdsc
, realm
);
234 pr_err_ratelimited_client(cl
,
235 "%p %llx.%llx null i_snap_realm\n",
236 inode
, ceph_vinop(inode
));
240 spin_lock(&realm
->inodes_with_caps_lock
);
241 has_inode
= realm
->inode
;
242 in
= has_inode
? igrab(realm
->inode
) : NULL
;
243 spin_unlock(&realm
->inodes_with_caps_lock
);
244 if (has_inode
&& !in
)
247 up_read(&mdsc
->snap_rwsem
);
248 in
= lookup_quotarealm_inode(mdsc
, inode
->i_sb
, realm
);
249 down_read(&mdsc
->snap_rwsem
);
250 if (IS_ERR_OR_NULL(in
))
252 ceph_put_snap_realm(mdsc
, realm
);
259 has_quota
= __ceph_has_quota(ci
, which_quota
);
262 next
= realm
->parent
;
263 if (has_quota
|| !next
) {
269 ceph_get_snap_realm(mdsc
, next
);
270 ceph_put_snap_realm(mdsc
, realm
);
274 ceph_put_snap_realm(mdsc
, realm
);
279 bool ceph_quota_is_same_realm(struct inode
*old
, struct inode
*new)
281 struct ceph_mds_client
*mdsc
= ceph_sb_to_mdsc(old
->i_sb
);
282 struct ceph_snap_realm
*old_realm
, *new_realm
;
288 * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
289 * However, get_quota_realm may drop it temporarily. By setting the
290 * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
291 * dropped and we can then restart the whole operation.
293 down_read(&mdsc
->snap_rwsem
);
294 get_quota_realm(mdsc
, old
, QUOTA_GET_ANY
, &old_realm
, true);
295 ret
= get_quota_realm(mdsc
, new, QUOTA_GET_ANY
, &new_realm
, false);
296 if (ret
== -EAGAIN
) {
297 up_read(&mdsc
->snap_rwsem
);
299 ceph_put_snap_realm(mdsc
, old_realm
);
302 is_same
= (old_realm
== new_realm
);
303 up_read(&mdsc
->snap_rwsem
);
306 ceph_put_snap_realm(mdsc
, old_realm
);
308 ceph_put_snap_realm(mdsc
, new_realm
);
313 enum quota_check_op
{
314 QUOTA_CHECK_MAX_FILES_OP
, /* check quota max_files limit */
315 QUOTA_CHECK_MAX_BYTES_OP
, /* check quota max_files limit */
316 QUOTA_CHECK_MAX_BYTES_APPROACHING_OP
/* check if quota max_files
317 limit is approaching */
321 * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
322 * realm, it will execute quota check operation defined by the 'op' parameter.
323 * The snaprealm walk is interrupted if the quota check detects that the quota
324 * is exceeded or if the root inode is reached.
326 static bool check_quota_exceeded(struct inode
*inode
, enum quota_check_op op
,
329 struct ceph_mds_client
*mdsc
= ceph_sb_to_mdsc(inode
->i_sb
);
330 struct ceph_client
*cl
= mdsc
->fsc
->client
;
331 struct ceph_inode_info
*ci
;
332 struct ceph_snap_realm
*realm
, *next
;
335 bool exceeded
= false;
337 if (ceph_snap(inode
) != CEPH_NOSNAP
)
340 down_read(&mdsc
->snap_rwsem
);
342 realm
= ceph_inode(inode
)->i_snap_realm
;
344 ceph_get_snap_realm(mdsc
, realm
);
346 pr_err_ratelimited_client(cl
,
347 "%p %llx.%llx null i_snap_realm\n",
348 inode
, ceph_vinop(inode
));
352 spin_lock(&realm
->inodes_with_caps_lock
);
353 has_inode
= realm
->inode
;
354 in
= has_inode
? igrab(realm
->inode
) : NULL
;
355 spin_unlock(&realm
->inodes_with_caps_lock
);
356 if (has_inode
&& !in
)
359 up_read(&mdsc
->snap_rwsem
);
360 in
= lookup_quotarealm_inode(mdsc
, inode
->i_sb
, realm
);
361 down_read(&mdsc
->snap_rwsem
);
362 if (IS_ERR_OR_NULL(in
))
364 ceph_put_snap_realm(mdsc
, realm
);
368 spin_lock(&ci
->i_ceph_lock
);
369 if (op
== QUOTA_CHECK_MAX_FILES_OP
) {
370 max
= ci
->i_max_files
;
371 rvalue
= ci
->i_rfiles
+ ci
->i_rsubdirs
;
373 max
= ci
->i_max_bytes
;
374 rvalue
= ci
->i_rbytes
;
376 spin_unlock(&ci
->i_ceph_lock
);
378 case QUOTA_CHECK_MAX_FILES_OP
:
379 case QUOTA_CHECK_MAX_BYTES_OP
:
380 exceeded
= (max
&& (rvalue
+ delta
> max
));
382 case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP
:
388 * when we're writing more that 1/16th
389 * of the available space
392 (((max
- rvalue
) >> 4) < delta
);
397 /* Shouldn't happen */
398 pr_warn_client(cl
, "Invalid quota check op (%d)\n", op
);
399 exceeded
= true; /* Just break the loop */
403 next
= realm
->parent
;
404 if (exceeded
|| !next
)
406 ceph_get_snap_realm(mdsc
, next
);
407 ceph_put_snap_realm(mdsc
, realm
);
411 ceph_put_snap_realm(mdsc
, realm
);
412 up_read(&mdsc
->snap_rwsem
);
418 * ceph_quota_is_max_files_exceeded - check if we can create a new file
419 * @inode: directory where a new file is being created
421 * This functions returns true is max_files quota allows a new file to be
422 * created. It is necessary to walk through the snaprealm hierarchy (until the
423 * FS root) to check all realms with quotas set.
425 bool ceph_quota_is_max_files_exceeded(struct inode
*inode
)
427 if (!ceph_has_realms_with_quotas(inode
))
430 WARN_ON(!S_ISDIR(inode
->i_mode
));
432 return check_quota_exceeded(inode
, QUOTA_CHECK_MAX_FILES_OP
, 1);
436 * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
437 * @inode: inode being written
438 * @newsize: new size if write succeeds
440 * This functions returns true is max_bytes quota allows a file size to reach
441 * @newsize; it returns false otherwise.
443 bool ceph_quota_is_max_bytes_exceeded(struct inode
*inode
, loff_t newsize
)
445 loff_t size
= i_size_read(inode
);
447 if (!ceph_has_realms_with_quotas(inode
))
450 /* return immediately if we're decreasing file size */
454 return check_quota_exceeded(inode
, QUOTA_CHECK_MAX_BYTES_OP
, (newsize
- size
));
458 * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
459 * @inode: inode being written
460 * @newsize: new size if write succeeds
462 * This function returns true if the new file size @newsize will be consuming
463 * more than 1/16th of the available quota space; it returns false otherwise.
465 bool ceph_quota_is_max_bytes_approaching(struct inode
*inode
, loff_t newsize
)
467 loff_t size
= ceph_inode(inode
)->i_reported_size
;
469 if (!ceph_has_realms_with_quotas(inode
))
472 /* return immediately if we're decreasing file size */
476 return check_quota_exceeded(inode
, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP
,
481 * ceph_quota_update_statfs - if root has quota update statfs with quota status
482 * @fsc: filesystem client instance
483 * @buf: statfs to update
485 * If the mounted filesystem root has max_bytes quota set, update the filesystem
486 * statistics with the quota status.
488 * This function returns true if the stats have been updated, false otherwise.
490 bool ceph_quota_update_statfs(struct ceph_fs_client
*fsc
, struct kstatfs
*buf
)
492 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
493 struct ceph_inode_info
*ci
;
494 struct ceph_snap_realm
*realm
;
496 u64 total
= 0, used
, free
;
497 bool is_updated
= false;
499 down_read(&mdsc
->snap_rwsem
);
500 get_quota_realm(mdsc
, d_inode(fsc
->sb
->s_root
), QUOTA_GET_MAX_BYTES
,
502 up_read(&mdsc
->snap_rwsem
);
506 spin_lock(&realm
->inodes_with_caps_lock
);
507 in
= realm
->inode
? igrab(realm
->inode
) : NULL
;
508 spin_unlock(&realm
->inodes_with_caps_lock
);
511 spin_lock(&ci
->i_ceph_lock
);
512 if (ci
->i_max_bytes
) {
513 total
= ci
->i_max_bytes
>> CEPH_BLOCK_SHIFT
;
514 used
= ci
->i_rbytes
>> CEPH_BLOCK_SHIFT
;
515 /* For quota size less than 4MB, use 4KB block size */
517 total
= ci
->i_max_bytes
>> CEPH_4K_BLOCK_SHIFT
;
518 used
= ci
->i_rbytes
>> CEPH_4K_BLOCK_SHIFT
;
519 buf
->f_frsize
= 1 << CEPH_4K_BLOCK_SHIFT
;
521 /* It is possible for a quota to be exceeded.
522 * Report 'zero' in that case
524 free
= total
> used
? total
- used
: 0;
525 /* For quota size less than 4KB, report the
526 * total=used=4KB,free=0 when quota is full
527 * and total=free=4KB, used=0 otherwise */
530 free
= ci
->i_max_bytes
> ci
->i_rbytes
? 1 : 0;
531 buf
->f_frsize
= 1 << CEPH_4K_BLOCK_SHIFT
;
534 spin_unlock(&ci
->i_ceph_lock
);
536 buf
->f_blocks
= total
;
538 buf
->f_bavail
= free
;
543 ceph_put_snap_realm(mdsc
, realm
);