Merge commit 'dfc115332c94a2f62058ac7f2bce7631fbd20b3d'
[unleashed/tickless.git] / kernel / fs / bootfs / bootfs_vnops.c
blobd3812e1e59d8eafed2e10d6f5632880bf890ee29
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
17 * bootfs vnode operations
20 #include <sys/types.h>
21 #include <sys/uio.h>
22 #include <sys/sunddi.h>
23 #include <sys/errno.h>
24 #include <sys/vfs.h>
25 #include <sys/vnode.h>
26 #include <sys/pathname.h>
27 #include <sys/mman.h>
28 #include <sys/fs_subr.h>
29 #include <sys/policy.h>
30 #include <sys/sysmacros.h>
31 #include <sys/dirent.h>
32 #include <sys/uio.h>
33 #include <vm/pvn.h>
34 #include <vm/hat.h>
35 #include <vm/seg_map.h>
36 #include <vm/seg_vn.h>
37 #include <sys/vmsystm.h>
39 #include <sys/fs/bootfs_impl.h>
41 /*ARGSUSED*/
42 static int
43 bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
45 return (0);
48 /*ARGSUSED*/
49 static int
50 bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
51 caller_context_t *ct)
53 return (0);
56 /*ARGSUSED*/
57 static int
58 bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
59 caller_context_t *ct)
61 int err;
62 ssize_t sres = uiop->uio_resid;
63 bootfs_node_t *bnp = vp->v_data;
65 if (vp->v_type == VDIR)
66 return (EISDIR);
68 if (vp->v_type != VREG)
69 return (EINVAL);
71 if (uiop->uio_loffset < 0)
72 return (EINVAL);
74 if (uiop->uio_loffset >= bnp->bvn_size)
75 return (0);
77 err = 0;
78 while (uiop->uio_resid != 0) {
79 caddr_t base;
80 long offset, frem;
81 ulong_t poff, segoff;
82 size_t bytes;
83 int relerr;
85 offset = uiop->uio_loffset;
86 poff = offset & PAGEOFFSET;
87 bytes = MIN(PAGESIZE - poff, uiop->uio_resid);
89 frem = bnp->bvn_size - offset;
90 if (frem <= 0) {
91 err = 0;
92 break;
95 /* Don't read past EOF */
96 bytes = MIN(bytes, frem);
99 * Segmaps are likely larger than our page size, so make sure we
100 * have the proper offfset into the resulting segmap data.
102 segoff = (offset & PAGEMASK) & MAXBOFFSET;
104 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes,
105 1, S_READ);
107 err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop);
108 relerr = segmap_release(segkmap, base, 0);
110 if (err == 0)
111 err = relerr;
113 if (err != 0)
114 break;
117 /* Even if we had an error in a partial read, return success */
118 if (uiop->uio_resid > sres)
119 err = 0;
121 gethrestime(&bnp->bvn_attr.va_atime);
123 return (err);
126 /*ARGSUSED*/
127 static int
128 bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
129 cred_t *cr, int *rvalp, caller_context_t *ct)
131 return (ENOTTY);
134 /*ARGSUSED*/
135 static int
136 bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
137 caller_context_t *ct)
139 uint32_t mask;
140 bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
142 mask = vap->va_mask;
143 bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t));
144 vap->va_mask = mask;
145 return (0);
148 /*ARGSUSED*/
149 static int
150 bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
151 caller_context_t *ct)
153 int shift = 0;
154 bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
156 if (crgetuid(cr) != bpn->bvn_attr.va_uid) {
157 shift += 3;
158 if (groupmember(bpn->bvn_attr.va_gid, cr) == 0)
159 shift += 3;
162 return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid,
163 bpn->bvn_attr.va_mode << shift, mode));
166 /*ARGSUSED*/
167 static int
168 bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
169 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
170 int *direntflags, pathname_t *realpnp)
172 avl_index_t where;
173 bootfs_node_t sn, *bnp;
174 bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data;
176 if (flags & LOOKUP_XATTR)
177 return (EINVAL);
179 if (bpp->bvn_attr.va_type != VDIR)
180 return (ENOTDIR);
182 if (*nm == '\0' || strcmp(nm, ".") == 0) {
183 VN_HOLD(dvp);
184 *vpp = dvp;
185 return (0);
188 if (strcmp(nm, "..") == 0) {
189 VN_HOLD(bpp->bvn_parent->bvn_vnp);
190 *vpp = bpp->bvn_parent->bvn_vnp;
191 return (0);
194 sn.bvn_name = nm;
195 bnp = avl_find(&bpp->bvn_dir, &sn, &where);
196 if (bnp == NULL)
197 return (ENOENT);
199 VN_HOLD(bnp->bvn_vnp);
200 *vpp = bnp->bvn_vnp;
201 return (0);
204 /*ARGSUSED*/
205 static int
206 bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
207 caller_context_t *ct, int flags)
209 bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
210 dirent64_t *dp;
211 void *buf;
212 ulong_t bsize, brem;
213 offset_t coff, roff;
214 int dlen, ret;
215 bootfs_node_t *dnp;
216 boolean_t first = B_TRUE;
218 if (uiop->uio_loffset >= MAXOFF_T) {
219 if (eofp != NULL)
220 *eofp = 1;
221 return (0);
224 if (uiop->uio_iovcnt != 1)
225 return (EINVAL);
227 if (!(uiop->uio_iov->iov_len > 0))
228 return (EINVAL);
230 if (vp->v_type != VDIR)
231 return (ENOTDIR);
233 roff = uiop->uio_loffset;
234 coff = 0;
235 brem = bsize = uiop->uio_iov->iov_len;
236 buf = kmem_alloc(bsize, KM_SLEEP);
237 dp = buf;
240 * Recall that offsets here are done based on the name of the dirent
241 * excluding the null terminator. Therefore `.` is always at 0, `..` is
242 * always at 1, and then the first real dirent is at 3. This offset is
243 * what's actually stored when we update the offset in the structure.
245 if (roff == 0) {
246 dlen = DIRENT64_RECLEN(1);
247 if (first == B_TRUE) {
248 if (dlen > brem) {
249 kmem_free(buf, bsize);
250 return (EINVAL);
252 first = B_FALSE;
254 dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid;
255 dp->d_off = 0;
256 dp->d_reclen = (ushort_t)dlen;
257 (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen));
258 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
259 brem -= dlen;
262 if (roff <= 1) {
263 dlen = DIRENT64_RECLEN(2);
264 if (first == B_TRUE) {
265 if (dlen > brem) {
266 kmem_free(buf, bsize);
267 return (EINVAL);
269 first = B_FALSE;
271 dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid;
272 dp->d_off = 1;
273 dp->d_reclen = (ushort_t)dlen;
274 (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen));
275 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
276 brem -= dlen;
279 coff = 3;
280 for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL;
281 dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) {
282 size_t nlen = strlen(dnp->bvn_name);
284 if (roff > coff) {
285 coff += nlen;
286 continue;
289 dlen = DIRENT64_RECLEN(nlen);
290 if (dlen > brem) {
291 if (first == B_TRUE) {
292 kmem_free(buf, bsize);
293 return (EINVAL);
295 break;
297 first = B_FALSE;
299 dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid;
300 dp->d_off = coff;
301 dp->d_reclen = (ushort_t)dlen;
302 (void) strncpy(dp->d_name, dnp->bvn_name,
303 DIRENT64_NAMELEN(dlen));
304 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
305 brem -= dlen;
306 coff += nlen;
309 ret = uiomove(buf, (bsize - brem), UIO_READ, uiop);
311 if (ret == 0) {
312 if (dnp == NULL) {
313 coff++;
314 if (eofp != NULL)
315 *eofp = 1;
316 } else if (eofp != NULL) {
317 *eofp = 0;
319 uiop->uio_loffset = coff;
321 gethrestime(&bnp->bvn_attr.va_atime);
322 kmem_free(buf, bsize);
323 return (ret);
326 /*ARGSUSED*/
327 static void
328 bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
332 /*ARGSUSED*/
333 static int
334 bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
336 if (write_lock != 0)
337 return (EINVAL);
338 return (0);
341 /*ARGSUSED*/
342 static void
343 bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
347 /*ARGSUSED*/
348 static int
349 bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
350 caller_context_t *ct)
352 bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
353 if (vp->v_type == VDIR)
354 return (0);
355 return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0));
359 * We need to fill in a single page of a vnode's memory based on the actual data
360 * from the kernel. We'll use this node's sliding window into physical memory
361 * and update one page at a time.
363 /*ARGSUSED*/
364 static int
365 bootfs_getapage(vnode_t *vp, uoff_t off, size_t len, uint_t *protp,
366 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
367 cred_t *cr)
369 bootfs_node_t *bnp = vp->v_data;
370 page_t *pp, *fpp;
371 pfn_t pfn;
373 for (;;) {
374 /* Easy case where the page exists */
375 pp = page_lookup(&vp->v_object, off,
376 rw == S_CREATE ? SE_EXCL : SE_SHARED);
377 if (pp != NULL) {
378 if (pl != NULL) {
379 pl[0] = pp;
380 pl[1] = NULL;
381 } else {
382 page_unlock(pp);
384 return (0);
387 pp = page_create_va(&vp->v_object, off, PAGESIZE,
388 PG_EXCL | PG_WAIT, seg, addr);
391 * If we didn't get the page, that means someone else beat us to
392 * creating this so we need to try again.
394 if (pp != NULL)
395 break;
398 pfn = btop((bnp->bvn_addr + off) & PAGEMASK);
399 fpp = page_numtopp_nolock(pfn);
401 if (ppcopy(fpp, pp) == 0) {
402 pvn_read_done(pp, B_ERROR);
403 return (EIO);
406 if (pl != NULL) {
407 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
408 } else {
409 pvn_io_done(pp);
412 return (0);
415 /*ARGSUSED*/
416 static int
417 bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
418 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
419 cred_t *cr, caller_context_t *ct)
421 int err;
422 bootfs_node_t *bnp = vp->v_data;
424 if (off + len > bnp->bvn_size + PAGEOFFSET)
425 return (EFAULT);
427 if (protp != NULL)
428 *protp = PROT_ALL;
430 if (len <= PAGESIZE)
431 err = bootfs_getapage(vp, (uoff_t)off, len, protp, pl,
432 plsz, seg, addr, rw, cr);
433 else
434 err = pvn_getpages(bootfs_getapage, vp, (uoff_t)off, len,
435 protp, pl, plsz, seg, addr, rw, cr);
437 return (err);
440 /*ARGSUSED*/
441 static int
442 bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
443 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
444 caller_context_t *ct)
446 int ret;
447 segvn_crargs_t vn_a;
449 #ifdef _ILP32
450 if (len > MAXOFF_T)
451 return (ENOMEM);
452 #endif
454 if (vp->v_flag & VNOMAP)
455 return (ENOSYS);
457 if (off < 0 || off > MAXOFFSET_T - off)
458 return (ENXIO);
460 if (vp->v_type != VREG)
461 return (ENODEV);
463 if ((prot & PROT_WRITE) && (flags & MAP_SHARED))
464 return (ENOTSUP);
466 as_rangelock(as);
467 ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
468 if (ret != 0) {
469 as_rangeunlock(as);
470 return (ret);
473 vn_a.vp = vp;
474 vn_a.offset = (uoff_t)off;
475 vn_a.type = flags & MAP_TYPE;
476 vn_a.prot = prot;
477 vn_a.maxprot = maxprot;
478 vn_a.cred = cr;
479 vn_a.amp = NULL;
480 vn_a.flags = flags & ~MAP_TYPE;
481 vn_a.szc = 0;
482 vn_a.lgrp_mem_policy_flags = 0;
484 ret = as_map(as, *addrp, len, segvn_create, &vn_a);
486 as_rangeunlock(as);
487 return (ret);
491 /*ARGSUSED*/
492 static int
493 bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
494 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
495 caller_context_t *ct)
497 return (0);
500 /*ARGSUSED*/
501 static int
502 bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
503 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
504 caller_context_t *ct)
506 return (0);
509 static int
510 bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
511 caller_context_t *ct)
513 int ret;
515 switch (cmd) {
516 case _PC_TIMESTAMP_RESOLUTION:
517 *valp = 1L;
518 ret = 0;
519 break;
520 default:
521 ret = fs_pathconf(vp, cmd, valp, cr, ct);
524 return (ret);
527 const struct vnodeops bootfs_vnodeops = {
528 .vnop_name = "bootfs",
529 .vop_open = bootfs_open,
530 .vop_close = bootfs_close,
531 .vop_read = bootfs_read,
532 .vop_ioctl = bootfs_ioctl,
533 .vop_getattr = bootfs_getattr,
534 .vop_access = bootfs_access,
535 .vop_lookup = bootfs_lookup,
536 .vop_readdir = bootfs_readdir,
537 .vop_inactive = bootfs_inactive,
538 .vop_rwlock = bootfs_rwlock,
539 .vop_rwunlock = bootfs_rwunlock,
540 .vop_seek = bootfs_seek,
541 .vop_getpage = bootfs_getpage,
542 .vop_map = bootfs_map,
543 .vop_addmap = bootfs_addmap,
544 .vop_delmap = bootfs_delmap,
545 .vop_pathconf = bootfs_pathconf,