4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
41 #pragma ident "%Z%%M% %I% %E% SMI"
44 * Routines used in checking limits on file system usage.
47 #include <sys/types.h>
48 #include <sys/t_lock.h>
49 #include <sys/param.h>
51 #include <sys/systm.h>
53 #include <sys/signal.h>
59 #include <sys/vnode.h>
62 #include <sys/fs/ufs_inode.h>
63 #include <sys/fs/ufs_fs.h>
64 #include <sys/fs/ufs_quota.h>
65 #include <sys/errno.h>
66 #include <sys/cmn_err.h>
67 #include <sys/session.h>
68 #include <sys/debug.h>
71 * Find the dquot structure that should
72 * be used in checking i/o on inode ip.
75 getinoquota(struct inode
*ip
)
77 struct dquot
*dqp
, *xdqp
;
78 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
80 ASSERT(RW_LOCK_HELD(&ufsvfsp
->vfs_dqrwlock
));
81 ASSERT(RW_WRITE_HELD(&ip
->i_contents
));
83 * Check for quotas enabled.
85 if ((ufsvfsp
->vfs_qflags
& MQ_ENABLED
) == 0) {
90 * Check for someone doing I/O to quota file.
92 if (ip
== ufsvfsp
->vfs_qinod
) {
97 * Check for a legal inode, e.g. not a shadow inode,
98 * not a extended attribute directory inode and a valid mode.
100 ASSERT((ip
->i_mode
& IFMT
) != IFSHAD
);
101 ASSERT((ip
->i_mode
& IFMT
) != IFATTRDIR
);
104 if (getdiskquota((uid_t
)ip
->i_uid
, ufsvfsp
, 0, &xdqp
)) {
108 mutex_enter(&dqp
->dq_lock
);
109 ASSERT(ip
->i_uid
== dqp
->dq_uid
);
111 if (dqp
->dq_fhardlimit
== 0 && dqp
->dq_fsoftlimit
== 0 &&
112 dqp
->dq_bhardlimit
== 0 && dqp
->dq_bsoftlimit
== 0) {
114 mutex_exit(&dqp
->dq_lock
);
117 mutex_exit(&dqp
->dq_lock
);
123 * Update disk usage, and take corrective action.
126 chkdq(struct inode
*ip
, long change
, int force
, struct cred
*cr
,
127 char **uerrp
, size_t *lenp
)
131 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
135 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n";
137 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n";
139 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n";
141 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n";
146 * Shadow inodes do not need to hold the vfs_dqrwlock lock.
148 ASSERT((ip
->i_mode
& IFMT
) == IFSHAD
||
149 RW_LOCK_HELD(&ufsvfsp
->vfs_dqrwlock
));
150 ASSERT(RW_WRITE_HELD(&ip
->i_contents
));
157 * Make sure the quota info record matches the owner.
159 ASSERT(dqp
== NULL
|| ip
->i_uid
== dqp
->dq_uid
);
163 * Shadow inodes and extended attribute directories
164 * should not have quota info records.
166 if ((ip
->i_mode
& IFMT
) == IFSHAD
|| (ip
->i_mode
& IFMT
) == IFATTRDIR
) {
170 * Paranoia for verifying that quotas are okay.
173 struct dquot
*expect_dq
;
176 /* Get current quota information */
177 expect_dq
= getinoquota(ip
);
179 * We got NULL back from getinoquota(), but there is
180 * no error code return from that interface and some
181 * errors are "ok" because we may be testing via error
182 * injection. If this is not the quota inode then we
183 * use getdiskquota() to see if there is an error and
184 * if the error is ok.
186 if (expect_dq
== NULL
&& ip
!= ufsvfsp
->vfs_qinod
) {
190 error
= getdiskquota((uid_t
)ip
->i_uid
, ufsvfsp
, 0,
194 * Either the error was transient or the quota
195 * info record has no limits which gets optimized
196 * out by getinoquota().
199 if (xdqp
->dq_fhardlimit
== 0 &&
200 xdqp
->dq_fsoftlimit
== 0 &&
201 xdqp
->dq_bhardlimit
== 0 &&
202 xdqp
->dq_bsoftlimit
== 0) {
203 mutex_enter(&xdqp
->dq_lock
);
205 mutex_exit(&xdqp
->dq_lock
);
211 case ESRCH
: /* quotas are not enabled */
212 case EINVAL
: /* error flag set on cached record */
213 case EUSERS
: /* quota table is full */
214 case EIO
: /* I/O error */
221 * Make sure dqp and the current quota info agree.
222 * The first part of the #ifndef is the quick way to
223 * do the check and should be part of the standard
224 * DEBUG code. The #else part is useful if you are
225 * actually chasing an inconsistency and don't want
226 * to have to look at stack frames to figure which
227 * variable has what value.
230 ASSERT(mismatch_ok
|| dqp
== expect_dq
);
231 #else /* CHASE_QUOTA */
232 if (expect_dq
== NULL
) {
234 * If you hit this ASSERT() you know that quota
235 * subsystem does not expect quota info for this
236 * inode, but the inode has it.
238 ASSERT(mismatch_ok
|| dqp
== NULL
);
241 * If you hit this ASSERT() you know that quota
242 * subsystem expects quota info for this inode,
243 * but the inode does not have it.
247 * If you hit this ASSERT() you know that quota
248 * subsystem expects quota info for this inode
249 * and the inode has quota info, but the two
250 * quota info pointers are not the same.
252 ASSERT(dqp
== expect_dq
);
254 #endif /* !CHASE_QUOTA */
256 * Release for getinoquota() above or getdiskquota()
257 * call when error is transient.
260 mutex_enter(&expect_dq
->dq_lock
);
262 mutex_exit(&expect_dq
->dq_lock
);
268 * Shadow inodes and extended attribute directories
269 * do not have quota info records.
274 * Quotas are not enabled on this file system so there is nothing
277 if ((ufsvfsp
->vfs_qflags
& MQ_ENABLED
) == 0) {
280 mutex_enter(&dqp
->dq_lock
);
282 dqp
->dq_flags
|= DQ_MOD
;
283 abs_change
= -change
; /* abs_change must be positive */
284 if (dqp
->dq_curblocks
< abs_change
)
285 dqp
->dq_curblocks
= 0;
287 dqp
->dq_curblocks
+= change
;
288 if (dqp
->dq_curblocks
< dqp
->dq_bsoftlimit
)
289 dqp
->dq_btimelimit
= 0;
290 dqp
->dq_flags
&= ~DQ_BLKS
;
292 mutex_exit(&dqp
->dq_lock
);
297 * Adding 'change' to dq_curblocks could cause an overflow.
298 * So store the result in a 64-bit variable and check for
301 ncurblocks
= (uint64_t)dqp
->dq_curblocks
+ change
;
304 * Allocation. Check hard and soft limits.
305 * Skip checks for uid 0 owned files.
306 * This check used to require both euid and ip->i_uid
307 * to be 0; but there are no quotas for uid 0 so
308 * it really doesn't matter who is writing to the
309 * root owned file. And even root cannot write
310 * past a user's quota limit.
316 * Disallow allocation if it would bring the current usage over
317 * the hard limit or if the user is over their soft limit and their
320 if (dqp
->dq_bhardlimit
&& ncurblocks
>= (uint64_t)dqp
->dq_bhardlimit
&&
322 /* If the user was not informed yet and the caller */
323 /* is the owner of the file */
324 if ((dqp
->dq_flags
& DQ_BLKS
) == 0 &&
325 ip
->i_uid
== crgetruid(cr
)) {
327 dqp
->dq_flags
|= DQ_BLKS
;
332 if (dqp
->dq_bsoftlimit
&& ncurblocks
>= (uint64_t)dqp
->dq_bsoftlimit
) {
333 now
= gethrestime_sec();
334 if (dqp
->dq_curblocks
< dqp
->dq_bsoftlimit
||
335 dqp
->dq_btimelimit
== 0) {
336 dqp
->dq_flags
|= DQ_MOD
;
337 dqp
->dq_btimelimit
= now
+
338 ((struct ufsvfs
*)ITOV(ip
)->v_vfsp
->vfs_data
)
340 if (ip
->i_uid
== crgetruid(cr
)) {
343 } else if (now
> dqp
->dq_btimelimit
&& !force
) {
344 /* If the user was not informed yet and the */
345 /* caller is the owner of the file */
346 if ((dqp
->dq_flags
& DQ_BLKS
) == 0 &&
347 ip
->i_uid
== crgetruid(cr
)) {
349 dqp
->dq_flags
|= DQ_BLKS
;
356 dqp
->dq_flags
|= DQ_MOD
;
358 * ncurblocks can be bigger than the maximum
359 * number that can be represented in 32-bits.
360 * When copying ncurblocks to dq_curblocks
361 * (an unsigned 32-bit quantity), make sure there
362 * is no overflow. The only way this can happen
363 * is if "force" is set. Otherwise, this allocation
364 * would have exceeded the hard limit check above
365 * (since the hard limit is a 32-bit quantity).
367 if (ncurblocks
> 0xffffffffLL
) {
368 dqp
->dq_curblocks
= 0xffffffff;
371 dqp
->dq_curblocks
= ncurblocks
;
375 if (dqp
->dq_flags
& DQ_MOD
)
378 mutex_exit(&dqp
->dq_lock
);
380 * Check for any error messages to be sent
382 if (errmsg
!= NULL
) {
384 * Send message to the error log.
388 * Set up message caller should send to user;
389 * gets copied to the message buffer as a side-
390 * effect of the caller's uprintf().
392 *lenp
= strlen(errmsg
) + 20 + 20 +
393 strlen(ip
->i_fs
->fs_fsmnt
) + 1;
394 *uerrp
= (char *)kmem_alloc(*lenp
, KM_NOSLEEP
);
395 if (*uerrp
!= NULL
) {
396 /* errmsg+1 => skip leading ! */
397 (void) sprintf(*uerrp
, errmsg
+1,
398 (int)ttoproc(curthread
)->p_pid
,
399 (int)ip
->i_uid
, (int)ip
->i_number
,
404 * Caller doesn't care, so just copy to the
407 cmn_err(CE_NOTE
, errmsg
,
408 (int)ttoproc(curthread
)->p_pid
,
409 (int)ip
->i_uid
, (int)ip
->i_number
,
417 * Check the inode limit, applying corrective action.
420 chkiq(struct ufsvfs
*ufsvfsp
, int change
, struct inode
*ip
, uid_t uid
,
421 int force
, struct cred
*cr
, char **uerrp
, size_t *lenp
)
423 struct dquot
*dqp
, *xdqp
;
424 unsigned int ncurfiles
;
427 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n";
429 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n";
431 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n";
435 ASSERT(RW_READ_HELD(&ufsvfsp
->vfs_dqrwlock
));
437 * Change must be either a single increment or decrement.
438 * If change is an increment, then ip must be NULL.
440 ASSERT(change
== 1 || change
== -1);
441 ASSERT(change
!= 1 || ip
== NULL
);
444 * Quotas are not enabled so bail out now.
446 if ((ufsvfsp
->vfs_qflags
& MQ_ENABLED
) == 0) {
451 * Free a specific inode.
453 if (change
== -1 && ip
) {
456 * Shadow inodes and extended attribute directories
457 * do not have quota info records.
461 mutex_enter(&dqp
->dq_lock
);
462 if (dqp
->dq_curfiles
) {
464 dqp
->dq_flags
|= DQ_MOD
;
466 if (dqp
->dq_curfiles
< dqp
->dq_fsoftlimit
) {
467 dqp
->dq_ftimelimit
= 0;
468 dqp
->dq_flags
|= DQ_MOD
;
470 dqp
->dq_flags
&= ~DQ_FILES
;
471 if (dqp
->dq_flags
& DQ_MOD
)
473 mutex_exit(&dqp
->dq_lock
);
478 * Allocation or deallocation without a specific inode.
479 * Get dquot for for uid, fs.
481 if (getdiskquota(uid
, ufsvfsp
, 0, &xdqp
)) {
485 mutex_enter(&dqp
->dq_lock
);
486 if (dqp
->dq_fsoftlimit
== 0 && dqp
->dq_fhardlimit
== 0) {
488 mutex_exit(&dqp
->dq_lock
);
493 * Skip checks for uid 0 owned files.
494 * This check used to require both euid and uid
495 * to be 0; but there are no quotas for uid 0 so
496 * it really doesn't matter who is writing to the
497 * root owned file. And even root can not write
498 * past the user's quota limit.
504 * Theoretically, this could overflow, but in practice, it
505 * won't. Multi-terabyte file systems are required to have an
506 * nbpi value of at least 1MB. In order to overflow this
507 * field, there would have to be 2^32 inodes in the file.
508 * That would imply a file system of 2^32 * 1MB, which is
509 * 2^(32 + 20), which is 4096 terabytes, which is not
510 * contemplated for ufs any time soon.
512 ncurfiles
= dqp
->dq_curfiles
+ change
;
515 * Dissallow allocation if it would bring the current usage over
516 * the hard limit or if the user is over their soft limit and their
519 if (change
== 1 && ncurfiles
>= dqp
->dq_fhardlimit
&&
520 dqp
->dq_fhardlimit
&& !force
) {
521 /* If the user was not informed yet and the caller */
522 /* is the owner of the file */
523 if ((dqp
->dq_flags
& DQ_FILES
) == 0 && uid
== crgetruid(cr
)) {
525 dqp
->dq_flags
|= DQ_FILES
;
528 } else if (change
== 1 && ncurfiles
>= dqp
->dq_fsoftlimit
&&
529 dqp
->dq_fsoftlimit
) {
530 now
= gethrestime_sec();
531 if (ncurfiles
== dqp
->dq_fsoftlimit
||
532 dqp
->dq_ftimelimit
== 0) {
533 dqp
->dq_flags
|= DQ_MOD
;
534 dqp
->dq_ftimelimit
= now
+ ufsvfsp
->vfs_ftimelimit
;
535 /* If the caller owns the file */
536 if (uid
== crgetruid(cr
))
538 } else if (now
> dqp
->dq_ftimelimit
&& !force
) {
539 /* If the user was not informed yet and the */
540 /* caller is the owner of the file */
541 if ((dqp
->dq_flags
& DQ_FILES
) == 0 &&
542 uid
== crgetruid(cr
)) {
544 dqp
->dq_flags
|= DQ_FILES
;
551 dqp
->dq_flags
|= DQ_MOD
;
552 dqp
->dq_curfiles
+= change
;
554 if (dqp
->dq_flags
& DQ_MOD
)
557 mutex_exit(&dqp
->dq_lock
);
559 * Check for any error messages to be sent
561 if (errmsg
!= NULL
) {
563 * Send message to the error log.
567 * Set up message caller should send to user;
568 * gets copied to the message buffer as a side-
569 * effect of the caller's uprintf().
571 *lenp
= strlen(errmsg
) + 20 + 20 +
572 strlen(ufsvfsp
->vfs_fs
->fs_fsmnt
) + 1;
573 *uerrp
= (char *)kmem_alloc(*lenp
, KM_NOSLEEP
);
574 if (*uerrp
!= NULL
) {
575 /* errmsg+1 => skip leading ! */
576 (void) sprintf(*uerrp
, errmsg
+1,
577 (int)ttoproc(curthread
)->p_pid
,
578 (int)uid
, ufsvfsp
->vfs_fs
->fs_fsmnt
);
582 * Caller doesn't care, so just copy to the
585 cmn_err(CE_NOTE
, errmsg
,
586 (int)ttoproc(curthread
)->p_pid
,
587 (int)uid
, ufsvfsp
->vfs_fs
->fs_fsmnt
);
597 dqrele(struct dquot
*dqp
)
600 * Shadow inodes and extended attribute directories
601 * do not have quota info records.
604 mutex_enter(&dqp
->dq_lock
);
605 if (dqp
->dq_cnt
== 1 && dqp
->dq_flags
& DQ_MOD
)
608 mutex_exit(&dqp
->dq_lock
);