Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / mem.c
blob676be8a148892a4f478c22e3004a90d1443483a0
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
28 * Copyright 2017 Joyent, Inc.
29 * Copyright 2017 James S Blachly, MD <james.blachly@gmail.com>
33 * Memory special file
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/user.h>
39 #include <sys/buf.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/vm.h>
43 #include <sys/uio.h>
44 #include <sys/mman.h>
45 #include <sys/kmem.h>
46 #include <vm/seg.h>
47 #include <vm/page.h>
48 #include <sys/stat.h>
49 #include <sys/vmem.h>
50 #include <sys/memlist.h>
51 #include <sys/bootconf.h>
53 #include <vm/seg_vn.h>
54 #include <vm/seg_dev.h>
55 #include <vm/seg_kmem.h>
56 #include <vm/seg_kp.h>
57 #include <vm/seg_kpm.h>
58 #include <vm/hat.h>
60 #include <sys/conf.h>
61 #include <sys/mem.h>
62 #include <sys/types.h>
63 #include <sys/conf.h>
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/errno.h>
67 #include <sys/modctl.h>
68 #include <sys/memlist.h>
69 #include <sys/ddi.h>
70 #include <sys/sunddi.h>
71 #include <sys/debug.h>
72 #include <sys/fm/protocol.h>
74 #if defined(__sparc)
75 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
76 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
77 uint64_t *, int *, int *, int *);
78 extern size_t cpu_get_name_bufsize(void);
79 extern int cpu_get_mem_sid(char *, char *, int, int *);
80 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
81 #elif defined(__x86)
82 #include <sys/cpu_module.h>
83 #endif /* __sparc */
86 * Turn a byte length into a pagecount. The DDI btop takes a
87 * 32-bit size on 32-bit machines, this handles 64-bit sizes for
88 * large physical-memory 32-bit machines.
90 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
92 static kmutex_t mm_lock;
93 static caddr_t mm_map;
95 static dev_info_t *mm_dip; /* private copy of devinfo pointer */
97 static int mm_kmem_io_access;
99 static int mm_kstat_update(kstat_t *ksp, int rw);
100 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
102 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
104 #define MM_KMEMLOG_NENTRIES 64
106 static int mm_kmemlogent;
107 static mm_logentry_t mm_kmemlog[MM_KMEMLOG_NENTRIES];
110 * On kmem/allmem writes, we log information that might be useful in the event
111 * that a write is errant (that is, due to operator error) and induces a later
112 * problem. Note that (in particular) in the event of such operator-induced
113 * corruption, a search over the kernel address space for the corrupted
114 * address will yield the ring buffer entry that recorded the write. And
115 * should it seem baroque or otherwise unnecessary, yes, we need this kind of
116 * auditing facility and yes, we learned that the hard way: disturbingly,
117 * there exist recommendations for "tuning" the system that involve writing to
118 * kernel memory addresses via the kernel debugger, and -- as we discovered --
119 * these can easily be applied incorrectly or unsafely, yielding an entirely
120 * undebuggable "can't happen" kind of panic.
122 static void
123 mm_logkmem(struct uio *uio)
125 mm_logentry_t *ent;
126 proc_t *p = curthread->t_procp;
128 mutex_enter(&mm_lock);
130 ent = &mm_kmemlog[mm_kmemlogent++];
132 if (mm_kmemlogent == MM_KMEMLOG_NENTRIES)
133 mm_kmemlogent = 0;
135 ent->mle_vaddr = (uintptr_t)uio->uio_loffset;
136 ent->mle_len = uio->uio_resid;
137 gethrestime(&ent->mle_hrestime);
138 ent->mle_hrtime = gethrtime();
139 ent->mle_pid = p->p_pidp->pid_id;
141 (void) strncpy(ent->mle_psargs,
142 p->p_user.u_psargs, sizeof (ent->mle_psargs));
144 mutex_exit(&mm_lock);
147 /*ARGSUSED1*/
148 static int
149 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
151 int i;
152 struct mem_minor {
153 char *name;
154 minor_t minor;
155 int privonly;
156 const char *rdpriv;
157 const char *wrpriv;
158 mode_t priv_mode;
159 } mm[] = {
160 { "mem", M_MEM, 0, NULL, "all", 0640 },
161 { "kmem", M_KMEM, 0, NULL, "all", 0640 },
162 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 },
163 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 },
164 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 },
165 { "full", M_FULL, PRIVONLY_DEV, NULL, NULL, 0666 },
167 kstat_t *ksp;
169 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
170 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
172 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
173 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
174 mm[i].minor, DDI_PSEUDO, mm[i].privonly,
175 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
176 DDI_FAILURE) {
177 ddi_remove_minor_node(devi, NULL);
178 return (DDI_FAILURE);
182 mm_dip = devi;
184 ksp = kstat_create("mm", 0, "phys_installed", "misc",
185 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
186 if (ksp != NULL) {
187 ksp->ks_update = mm_kstat_update;
188 ksp->ks_snapshot = mm_kstat_snapshot;
189 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
190 kstat_install(ksp);
193 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
194 "kmem_io_access", 0);
196 return (DDI_SUCCESS);
199 /*ARGSUSED*/
200 static int
201 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
203 register int error;
205 switch (infocmd) {
206 case DDI_INFO_DEVT2DEVINFO:
207 *result = (void *)mm_dip;
208 error = DDI_SUCCESS;
209 break;
210 case DDI_INFO_DEVT2INSTANCE:
211 *result = NULL;
212 error = DDI_SUCCESS;
213 break;
214 default:
215 error = DDI_FAILURE;
217 return (error);
220 /*ARGSUSED1*/
221 static int
222 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
224 switch (getminor(*devp)) {
225 case M_NULL:
226 case M_ZERO:
227 case M_FULL:
228 case M_MEM:
229 case M_KMEM:
230 case M_ALLKMEM:
231 /* standard devices */
232 break;
234 default:
235 /* Unsupported or unknown type */
236 return (EINVAL);
238 /* must be character device */
239 if (typ != OTYP_CHR)
240 return (EINVAL);
241 return (0);
244 struct pollhead mm_pollhd;
246 /*ARGSUSED*/
247 static int
248 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
249 struct pollhead **phpp)
251 switch (getminor(dev)) {
252 case M_NULL:
253 case M_ZERO:
254 case M_FULL:
255 case M_MEM:
256 case M_KMEM:
257 case M_ALLKMEM:
258 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
259 POLLWRNORM | POLLRDBAND | POLLWRBAND);
261 * A non NULL pollhead pointer should be returned in case
262 * user polls for 0 events or is doing an edge-triggerd poll.
264 if ((!*reventsp && !anyyet) || (events & POLLET))
265 *phpp = &mm_pollhd;
266 return (0);
267 default:
268 /* no other devices currently support polling */
269 return (ENXIO);
273 static int
274 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
275 char *name, caddr_t valuep, int *lengthp)
278 * implement zero size to reduce overhead (avoid two failing
279 * property lookups per stat).
281 return (ddi_prop_op_size(dev, dip, prop_op,
282 flags, name, valuep, lengthp, 0));
285 static int
286 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
287 page_t *pp)
289 int error = 0;
290 int devload = 0;
291 int is_memory = pf_is_memory(pfn);
292 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
293 (size_t)uio->uio_iov->iov_len);
294 caddr_t va = NULL;
296 mutex_enter(&mm_lock);
298 if (is_memory && kpm_enable) {
299 if (pp)
300 va = hat_kpm_mapin(pp, NULL);
301 else
302 va = hat_kpm_mapin_pfn(pfn);
305 if (va == NULL) {
306 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
307 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
308 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
309 va = mm_map;
310 devload = 1;
313 if (!is_memory) {
314 if (allowio) {
315 size_t c = uio->uio_iov->iov_len;
317 if (ddi_peekpokeio(NULL, uio, rw,
318 (caddr_t)(uintptr_t)uio->uio_loffset, c,
319 sizeof (int32_t)) != DDI_SUCCESS)
320 error = EFAULT;
321 } else
322 error = EIO;
323 } else
324 error = uiomove(va + pageoff, nbytes, rw, uio);
326 if (devload)
327 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
328 else if (pp)
329 hat_kpm_mapout(pp, NULL, va);
330 else
331 hat_kpm_mapout_pfn(pfn);
333 mutex_exit(&mm_lock);
334 return (error);
337 static int
338 mmpagelock(struct as *as, caddr_t va)
340 struct seg *seg;
341 int i;
343 AS_LOCK_ENTER(as, RW_READER);
344 seg = as_segat(as, va);
345 i = (seg != NULL)? segop_capable(seg, S_CAPABILITY_NOMINFLT) : 0;
346 AS_LOCK_EXIT(as);
348 return (i);
352 #define NEED_LOCK_KVADDR(va) 0
355 /*ARGSUSED3*/
356 static int
357 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
359 pfn_t v;
360 struct iovec *iov;
361 int error = 0;
362 size_t c;
363 ssize_t oresid = uio->uio_resid;
364 minor_t minor = getminor(dev);
366 while (uio->uio_resid > 0 && error == 0) {
367 iov = uio->uio_iov;
368 if (iov->iov_len == 0) {
369 uio->uio_iov++;
370 uio->uio_iovcnt--;
371 if (uio->uio_iovcnt < 0)
372 panic("mmrw");
373 continue;
375 switch (minor) {
377 case M_MEM:
378 memlist_read_lock();
379 if (!address_in_memlist(phys_install,
380 (uint64_t)uio->uio_loffset, 1)) {
381 memlist_read_unlock();
382 error = EFAULT;
383 break;
385 memlist_read_unlock();
387 v = BTOP((uoff_t)uio->uio_loffset);
388 error = mmio(uio, rw, v,
389 uio->uio_loffset & PAGEOFFSET, 0, NULL);
390 break;
392 case M_KMEM:
393 case M_ALLKMEM:
395 page_t **ppp = NULL;
396 caddr_t vaddr = (caddr_t)uio->uio_offset;
397 int try_lock = NEED_LOCK_KVADDR(vaddr);
398 int locked = 0;
400 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
401 break;
403 if (rw == UIO_WRITE)
404 mm_logkmem(uio);
407 * If vaddr does not map a valid page, as_pagelock()
408 * will return failure. Hence we can't check the
409 * return value and return EFAULT here as we'd like.
410 * seg_kp and seg_kpm do not properly support
411 * as_pagelock() for this context so we avoid it
412 * using the try_lock set check above. Some day when
413 * the kernel page locking gets redesigned all this
414 * muck can be cleaned up.
416 if (try_lock)
417 locked = (as_pagelock(&kas, &ppp, vaddr,
418 PAGESIZE, S_WRITE) == 0);
420 v = hat_getpfnum(kas.a_hat,
421 (caddr_t)(uintptr_t)uio->uio_loffset);
422 if (v == PFN_INVALID) {
423 if (locked)
424 as_pageunlock(&kas, ppp, vaddr,
425 PAGESIZE, S_WRITE);
426 error = EFAULT;
427 break;
430 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
431 minor == M_ALLKMEM || mm_kmem_io_access,
432 (locked && ppp) ? *ppp : NULL);
433 if (locked)
434 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
435 S_WRITE);
438 break;
440 case M_FULL:
441 if (rw == UIO_WRITE) {
442 error = ENOSPC;
443 break;
445 /* else it's a read, fall through to zero case */
446 /*FALLTHROUGH*/
448 case M_ZERO:
449 if (rw == UIO_READ) {
450 label_t ljb;
452 if (on_fault(&ljb)) {
453 no_fault();
454 error = EFAULT;
455 break;
457 uzero(iov->iov_base, iov->iov_len);
458 no_fault();
459 uio->uio_resid -= iov->iov_len;
460 uio->uio_loffset += iov->iov_len;
461 break;
463 /* else it's a write, fall through to NULL case */
464 /*FALLTHROUGH*/
466 case M_NULL:
467 if (rw == UIO_READ)
468 return (0);
469 c = iov->iov_len;
470 iov->iov_base += c;
471 iov->iov_len -= c;
472 uio->uio_loffset += c;
473 uio->uio_resid -= c;
474 break;
478 return (uio->uio_resid == oresid ? error : 0);
481 static int
482 mmread(dev_t dev, struct uio *uio, cred_t *cred)
484 return (mmrw(dev, uio, UIO_READ, cred));
487 static int
488 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
490 return (mmrw(dev, uio, UIO_WRITE, cred));
494 * Private ioctl for libkvm to support kvm_physaddr().
495 * Given an address space and a VA, compute the PA.
497 static int
498 mmioctl_vtop(intptr_t data)
500 #ifdef _SYSCALL32
501 mem_vtop32_t vtop32;
502 #endif
503 mem_vtop_t mem_vtop;
504 proc_t *p;
505 pfn_t pfn = (pfn_t)PFN_INVALID;
506 pid_t pid = 0;
507 struct as *as;
508 struct seg *seg;
510 if (get_udatamodel() == DATAMODEL_NATIVE) {
511 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
512 return (EFAULT);
514 #ifdef _SYSCALL32
515 else {
516 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
517 return (EFAULT);
518 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
519 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
521 if (mem_vtop.m_as != NULL)
522 return (EINVAL);
524 #endif
526 if (mem_vtop.m_as == &kas) {
527 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
528 } else {
529 if (mem_vtop.m_as == NULL) {
531 * Assume the calling process's address space if the
532 * caller didn't specify one.
534 p = curthread->t_procp;
535 if (p == NULL)
536 return (EIO);
537 mem_vtop.m_as = p->p_as;
540 mutex_enter(&pidlock);
541 for (p = practive; p != NULL; p = p->p_next) {
542 if (p->p_as == mem_vtop.m_as) {
543 pid = p->p_pid;
544 break;
547 mutex_exit(&pidlock);
548 if (p == NULL)
549 return (EIO);
550 p = sprlock(pid);
551 if (p == NULL)
552 return (EIO);
553 as = p->p_as;
554 if (as == mem_vtop.m_as) {
555 mutex_exit(&p->p_lock);
556 AS_LOCK_ENTER(as, RW_READER);
557 for (seg = AS_SEGFIRST(as); seg != NULL;
558 seg = AS_SEGNEXT(as, seg))
559 if ((uintptr_t)mem_vtop.m_va -
560 (uintptr_t)seg->s_base < seg->s_size)
561 break;
562 if (seg != NULL)
563 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
564 AS_LOCK_EXIT(as);
565 mutex_enter(&p->p_lock);
567 sprunlock(p);
569 mem_vtop.m_pfn = pfn;
570 if (pfn == PFN_INVALID)
571 return (EIO);
573 if (get_udatamodel() == DATAMODEL_NATIVE) {
574 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
575 return (EFAULT);
577 #ifdef _SYSCALL32
578 else {
579 vtop32.m_pfn = mem_vtop.m_pfn;
580 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
581 return (EFAULT);
583 #endif
585 return (0);
589 * Given a PA, execute the given page retire command on it.
591 static int
592 mmioctl_page_retire(int cmd, intptr_t data)
594 extern int page_retire_test(void);
595 uint64_t pa;
597 if (copyin((void *)data, &pa, sizeof (uint64_t))) {
598 return (EFAULT);
601 switch (cmd) {
602 case MEM_PAGE_ISRETIRED:
603 return (page_retire_check(pa, NULL));
605 case MEM_PAGE_UNRETIRE:
606 return (page_unretire(pa));
608 case MEM_PAGE_RETIRE:
609 return (page_retire(pa, PR_FMA));
611 case MEM_PAGE_RETIRE_MCE:
612 return (page_retire(pa, PR_MCE));
614 case MEM_PAGE_RETIRE_UE:
615 return (page_retire(pa, PR_UE));
617 case MEM_PAGE_GETERRORS:
619 uint64_t page_errors;
620 int rc = page_retire_check(pa, &page_errors);
621 if (copyout(&page_errors, (void *)data,
622 sizeof (uint64_t))) {
623 return (EFAULT);
625 return (rc);
628 case MEM_PAGE_RETIRE_TEST:
629 return (page_retire_test());
633 return (EINVAL);
638 * Private ioctls for
639 * libkvm to support kvm_physaddr().
640 * FMA support for page_retire() and memory attribute information.
642 /*ARGSUSED*/
643 static int
644 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
646 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
647 (cmd != MEM_VTOP && getminor(dev) != M_MEM))
648 return (ENXIO);
650 switch (cmd) {
651 case MEM_VTOP:
652 return (mmioctl_vtop(data));
654 case MEM_PAGE_RETIRE:
655 case MEM_PAGE_ISRETIRED:
656 case MEM_PAGE_UNRETIRE:
657 case MEM_PAGE_RETIRE_MCE:
658 case MEM_PAGE_RETIRE_UE:
659 case MEM_PAGE_GETERRORS:
660 case MEM_PAGE_RETIRE_TEST:
661 return (mmioctl_page_retire(cmd, data));
663 case MEM_NAME:
664 case MEM_INFO:
665 case MEM_SID:
666 return (ENOTSUP);
668 return (ENXIO);
671 /*ARGSUSED2*/
672 static int
673 mmmmap(dev_t dev, off_t off, int prot)
675 pfn_t pf;
676 struct memlist *pmem;
677 minor_t minor = getminor(dev);
679 switch (minor) {
680 case M_MEM:
681 pf = btop(off);
682 memlist_read_lock();
683 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
684 if (pf >= BTOP(pmem->ml_address) &&
685 pf < BTOP(pmem->ml_address + pmem->ml_size)) {
686 memlist_read_unlock();
687 return (impl_obmem_pfnum(pf));
690 memlist_read_unlock();
691 break;
693 case M_KMEM:
694 case M_ALLKMEM:
695 /* no longer supported with KPR */
696 return (-1);
698 case M_FULL:
699 case M_ZERO:
701 * We shouldn't be mmap'ing to /dev/zero here as
702 * mmsegmap() should have already converted
703 * a mapping request for this device to a mapping
704 * using seg_vn for anonymous memory.
706 break;
709 return (-1);
713 * This function is called when a memory device is mmap'ed.
714 * Set up the mapping to the correct device driver.
716 static int
717 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
718 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
720 struct segvn_crargs vn_a;
721 struct segdev_crargs dev_a;
722 int error;
723 minor_t minor;
724 off_t i;
726 minor = getminor(dev);
728 as_rangelock(as);
730 * No need to worry about vac alignment on /dev/zero
731 * since this is a "clone" object that doesn't yet exist.
733 error = choose_addr(as, addrp, len, off,
734 (minor == M_MEM) || (minor == M_KMEM), flags);
735 if (error != 0) {
736 as_rangeunlock(as);
737 return (error);
740 switch (minor) {
741 case M_MEM:
742 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
743 if ((flags & MAP_TYPE) != MAP_SHARED) {
744 as_rangeunlock(as);
745 return (EINVAL);
749 * Check to ensure that the entire range is
750 * legal and we are not trying to map in
751 * more than the device will let us.
753 for (i = 0; i < len; i += PAGESIZE) {
754 if (mmmmap(dev, off + i, maxprot) == -1) {
755 as_rangeunlock(as);
756 return (ENXIO);
761 * Use seg_dev segment driver for /dev/mem mapping.
763 dev_a.mapfunc = mmmmap;
764 dev_a.dev = dev;
765 dev_a.offset = off;
766 dev_a.type = (flags & MAP_TYPE);
767 dev_a.prot = (uchar_t)prot;
768 dev_a.maxprot = (uchar_t)maxprot;
769 dev_a.hat_attr = 0;
772 * Make /dev/mem mappings non-consistent since we can't
773 * alias pages that don't have page structs behind them,
774 * such as kernel stack pages. If someone mmap()s a kernel
775 * stack page and if we give them a tte with cv, a line from
776 * that page can get into both pages of the spitfire d$.
777 * But snoop from another processor will only invalidate
778 * the first page. This later caused kernel (xc_attention)
779 * to go into an infinite loop at pil 13 and no interrupts
780 * could come in. See 1203630.
783 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
784 dev_a.devmap_data = NULL;
786 error = as_map(as, *addrp, len, segdev_create, &dev_a);
787 break;
789 case M_ZERO:
791 * Use seg_vn segment driver for /dev/zero mapping.
792 * Passing in a NULL amp gives us the "cloning" effect.
794 vn_a.vp = NULL;
795 vn_a.offset = 0;
796 vn_a.type = (flags & MAP_TYPE);
797 vn_a.prot = prot;
798 vn_a.maxprot = maxprot;
799 vn_a.flags = flags & ~MAP_TYPE;
800 vn_a.cred = cred;
801 vn_a.amp = NULL;
802 vn_a.szc = 0;
803 vn_a.lgrp_mem_policy_flags = 0;
804 error = as_map(as, *addrp, len, segvn_create, &vn_a);
805 break;
807 case M_KMEM:
808 case M_ALLKMEM:
809 /* No longer supported with KPR. */
810 error = ENXIO;
811 break;
813 case M_NULL:
815 * Use seg_dev segment driver for /dev/null mapping.
817 dev_a.mapfunc = mmmmap;
818 dev_a.dev = dev;
819 dev_a.offset = off;
820 dev_a.type = 0; /* neither PRIVATE nor SHARED */
821 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
822 dev_a.hat_attr = 0;
823 dev_a.hat_flags = 0;
824 error = as_map(as, *addrp, len, segdev_create, &dev_a);
825 break;
827 default:
828 error = ENXIO;
831 as_rangeunlock(as);
832 return (error);
835 static struct cb_ops mm_cb_ops = {
836 mmopen, /* open */
837 nulldev, /* close */
838 nodev, /* strategy */
839 nodev, /* print */
840 nodev, /* dump */
841 mmread, /* read */
842 mmwrite, /* write */
843 mmioctl, /* ioctl */
844 nodev, /* devmap */
845 mmmmap, /* mmap */
846 mmsegmap, /* segmap */
847 mmchpoll, /* poll */
848 mmpropop, /* prop_op */
849 0, /* streamtab */
850 D_NEW | D_MP | D_64BIT | D_U64BIT
853 static struct dev_ops mm_ops = {
854 DEVO_REV, /* devo_rev, */
855 0, /* refcnt */
856 mm_info, /* get_dev_info */
857 nulldev, /* identify */
858 nulldev, /* probe */
859 mm_attach, /* attach */
860 nodev, /* detach */
861 nodev, /* reset */
862 &mm_cb_ops, /* driver operations */
863 NULL, /* bus operations */
864 NULL, /* power */
865 ddi_quiesce_not_needed, /* quiesce */
868 static struct modldrv modldrv = {
869 &mod_driverops, "memory driver", &mm_ops,
872 static struct modlinkage modlinkage = {
873 MODREV_1, &modldrv, NULL
877 _init(void)
879 return (mod_install(&modlinkage));
883 _info(struct modinfo *modinfop)
885 return (mod_info(&modlinkage, modinfop));
889 _fini(void)
891 return (mod_remove(&modlinkage));
894 static int
895 mm_kstat_update(kstat_t *ksp, int rw)
897 struct memlist *pmem;
898 uint_t count;
900 if (rw == KSTAT_WRITE)
901 return (EACCES);
903 count = 0;
904 memlist_read_lock();
905 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
906 count++;
908 memlist_read_unlock();
910 ksp->ks_ndata = count;
911 ksp->ks_data_size = count * 2 * sizeof (uint64_t);
913 return (0);
916 static int
917 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
919 struct memlist *pmem;
920 struct memunit {
921 uint64_t address;
922 uint64_t size;
923 } *kspmem;
925 if (rw == KSTAT_WRITE)
926 return (EACCES);
928 ksp->ks_snaptime = gethrtime();
930 kspmem = (struct memunit *)buf;
931 memlist_read_lock();
932 for (pmem = phys_install; pmem != NULL;
933 pmem = pmem->ml_next, kspmem++) {
934 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
935 break;
936 kspmem->address = pmem->ml_address;
937 kspmem->size = pmem->ml_size;
939 memlist_read_unlock();
941 return (0);
945 * Read a mem_name_t from user-space and store it in the mem_name_t
946 * pointed to by the mem_name argument.
948 static int
949 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
951 if (get_udatamodel() == DATAMODEL_NATIVE) {
952 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
953 return (EFAULT);
955 #ifdef _SYSCALL32
956 else {
957 mem_name32_t mem_name32;
959 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
960 return (EFAULT);
961 mem_name->m_addr = mem_name32.m_addr;
962 mem_name->m_synd = mem_name32.m_synd;
963 mem_name->m_type[0] = mem_name32.m_type[0];
964 mem_name->m_type[1] = mem_name32.m_type[1];
965 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
966 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
967 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
968 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
970 #endif /* _SYSCALL32 */
972 return (0);