4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/t_lock.h>
29 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
37 #include <sys/vnode.h>
40 #include <sys/errno.h>
43 #include <vm/seg_map.h>
47 #include <sys/fs/tmp.h>
48 #include <sys/fs/tmpnode.h>
49 #include <sys/debug.h>
50 #include <sys/cmn_err.h>
52 #include <sys/vtrace.h>
55 * Reserve swap space for the size of the file.
56 * Called before growing a file (i.e. ftruncate, write)
57 * Returns 0 on success.
63 size_t delta
, /* size needed */
64 int pagecreate
) /* call anon_resv if set */
66 pgcnt_t pages
= btopr(delta
);
69 ASSERT(RW_WRITE_HELD(&tp
->tn_rwlock
));
70 ASSERT(tp
->tn_type
== VREG
);
72 * pagecreate is set only if we actually need to call anon_resv
73 * to reserve an additional page of anonymous memory.
74 * Since anon_resv always reserves a page at a time,
75 * it should only get called when we know we're growing the
76 * file into a new page or filling a hole.
78 * Deny if trying to reserve more than tmpfs can allocate
80 zone
= tm
->tm_vfsp
->vfs_zone
;
81 if (pagecreate
&& ((tm
->tm_anonmem
+ pages
> tm
->tm_anonmax
) ||
82 (!anon_checkspace(ptob(pages
+ tmpfs_minfree
), zone
)) ||
83 (anon_try_resv_zone(delta
, zone
) == 0))) {
91 mutex_enter(&tm
->tm_contents
);
92 tm
->tm_anonmem
+= pages
;
93 mutex_exit(&tm
->tm_contents
);
95 TRACE_2(TR_FAC_VM
, TR_ANON_TMPFS
, "anon tmpfs:%p %lu",
103 * tmp_unresv - called when truncating a file
104 * Only called if we're freeing at least pagesize bytes
105 * because anon_unresv does a btopr(delta)
113 ASSERT(RW_WRITE_HELD(&tp
->tn_rwlock
));
114 ASSERT(tp
->tn_type
== VREG
);
116 anon_unresv_zone(delta
, tm
->tm_vfsp
->vfs_zone
);
118 mutex_enter(&tm
->tm_contents
);
119 tm
->tm_anonmem
-= btopr(delta
);
120 mutex_exit(&tm
->tm_contents
);
122 TRACE_2(TR_FAC_VM
, TR_ANON_TMPFS
, "anon tmpfs:%p %lu", tp
, delta
);
125 #define TMP_INIT_SZ 128
128 * Grow the anon pointer array to cover 'newsize' bytes plus slack.
131 tmpnode_growmap(struct tmpnode
*tp
, ulong_t newsize
)
133 pgcnt_t np
= btopr(newsize
);
135 ASSERT(RW_WRITE_HELD(&tp
->tn_rwlock
));
136 ASSERT(RW_WRITE_HELD(&tp
->tn_contents
));
137 ASSERT(tp
->tn_type
== VREG
);
139 if (tp
->tn_asize
>= np
)
142 if (newsize
> MAXOFF_T
)
143 np
= btopr((u_offset_t
)MAXOFF_T
);
145 if (tp
->tn_anon
== NULL
) {
146 tp
->tn_anon
= anon_create(MAX(np
, TMP_INIT_SZ
), ANON_SLEEP
);
147 tp
->tn_asize
= tp
->tn_anon
->size
;
151 tp
->tn_asize
= anon_grow(tp
->tn_anon
, NULL
, tp
->tn_asize
,
152 np
- tp
->tn_asize
, ANON_SLEEP
);
153 ASSERT(tp
->tn_asize
>= np
);
157 * Initialize a tmpnode and add it to file list under mount point.
160 tmpnode_init(struct tmount
*tm
, struct tmpnode
*t
, vattr_t
*vap
, cred_t
*cred
)
167 rw_init(&t
->tn_rwlock
, NULL
, RW_DEFAULT
, NULL
);
168 mutex_init(&t
->tn_tlock
, NULL
, MUTEX_DEFAULT
, NULL
);
169 t
->tn_mode
= MAKEIMODE(vap
->va_type
, vap
->va_mode
);
171 t
->tn_type
= vap
->va_type
;
172 t
->tn_nodeid
= (ino64_t
)(uint32_t)((uintptr_t)t
>> 3);
177 t
->tn_uid
= vap
->va_uid
;
178 t
->tn_gid
= vap
->va_gid
;
180 t
->tn_uid
= crgetuid(cred
);
181 t
->tn_gid
= crgetgid(cred
);
184 t
->tn_fsid
= tm
->tm_dev
;
185 t
->tn_rdev
= vap
->va_rdev
;
186 t
->tn_blksize
= PAGESIZE
;
195 t
->tn_vnode
= vn_alloc(KM_SLEEP
);
197 vn_setops(vp
, tmp_vnodeops
);
198 vp
->v_vfsp
= tm
->tm_vfsp
;
199 vp
->v_type
= vap
->va_type
;
200 vp
->v_rdev
= vap
->va_rdev
;
201 vp
->v_data
= (caddr_t
)t
;
202 mutex_enter(&tm
->tm_contents
);
204 * Increment the pseudo generation number for this tmpnode.
205 * Since tmpnodes are allocated and freed, there really is no
206 * particular generation number for a new tmpnode. Just fake it
207 * by using a counter in each file system.
209 t
->tn_gen
= tm
->tm_gen
++;
212 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs
213 * Root directory is handled specially in tmp_mount.
215 if (tm
->tm_rootnode
!= (struct tmpnode
*)NULL
) {
217 t
->tn_back
= tm
->tm_rootnode
->tn_back
;
218 t
->tn_back
->tn_forw
= tm
->tm_rootnode
->tn_back
= t
;
220 mutex_exit(&tm
->tm_contents
);
225 * tmpnode_trunc - set length of tmpnode and deal with resources
233 size_t oldsize
= tp
->tn_size
;
235 struct vnode
*vp
= TNTOV(tp
);
239 ASSERT(RW_WRITE_HELD(&tp
->tn_rwlock
));
240 ASSERT(RW_WRITE_HELD(&tp
->tn_contents
));
242 if (newsize
== oldsize
) {
243 /* Required by POSIX */
247 switch (tp
->tn_type
) {
249 /* Growing the file */
250 if (newsize
> oldsize
) {
251 delta
= P2ROUNDUP(newsize
, PAGESIZE
) -
252 P2ROUNDUP(oldsize
, PAGESIZE
);
254 * Grow the size of the anon array to the new size
255 * Reserve the space for the growth here.
256 * We do it this way for now because this is how
257 * tmpfs used to do it, and this way the reserved
258 * space is alway equal to the file size.
259 * Alternatively, we could wait to reserve space 'til
260 * someone tries to store into one of the newly
261 * trunc'ed up pages. This would give us behavior
262 * identical to ufs; i.e., you could fail a
263 * fault on storing into a holey region of a file
264 * if there is no space in the filesystem to fill
265 * the hole at that time.
268 * tmp_resv calls anon_resv only if we're extending
269 * the file into a new page
271 if (tmp_resv(tm
, tp
, delta
,
272 (btopr(newsize
) != btopr(oldsize
)))) {
276 tmpnode_growmap(tp
, newsize
);
277 tp
->tn_size
= newsize
;
281 /* Free anon pages if shrinking file over page boundary. */
282 if (btopr(newsize
) != btopr(oldsize
)) {
284 delta
= P2ROUNDUP(oldsize
, PAGESIZE
) -
285 P2ROUNDUP(newsize
, PAGESIZE
);
286 freed
= anon_pages(tp
->tn_anon
, btopr(newsize
),
288 tp
->tn_nblocks
-= freed
;
289 anon_free(tp
->tn_anon
, btopr(newsize
), delta
);
290 tmp_unresv(tm
, tp
, delta
);
294 * Update the file size now to reflect the pages we just
295 * blew away as we're about to drop the
296 * contents lock to zero the partial page (which could
297 * re-enter tmpfs via getpage and try to reacquire the lock)
298 * Once we drop the lock, faulters can fill in holes in
299 * the file and if we haven't updated the size they
300 * may fill in holes that are beyond EOF, which will then
303 tp
->tn_size
= newsize
;
305 /* Zero new size of file to page boundary. */
306 if (anon_get_ptr(tp
->tn_anon
, btop(newsize
)) != NULL
) {
309 zlen
= PAGESIZE
- ((ulong_t
)newsize
& PAGEOFFSET
);
310 rw_exit(&tp
->tn_contents
);
311 pvn_vpzero(TNTOV(tp
), (u_offset_t
)newsize
, zlen
);
312 rw_enter(&tp
->tn_contents
, RW_WRITER
);
316 /* Delete anon array for tmpnode */
317 ASSERT(tp
->tn_nblocks
== 0);
318 ASSERT(anon_get_ptr(tp
->tn_anon
, 0) == NULL
);
319 ASSERT(!vn_has_cached_data(vp
));
321 anon_release(tp
->tn_anon
, tp
->tn_asize
);
328 * Don't do anything here
329 * tmp_inactive frees the memory
336 * Remove all the directory entries under this directory.
343 ASSERT(tp
->tn_nlink
== 0);
355 * tmpnode_trunc() cannot fail when newsize == 0.
357 ASSERT(error
== 0 || newsize
!= 0);