dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / drivers / ramdisk / ramdisk.c
blob34bb8aa34781e78d8d90be0108318029a37842ac
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
29 * Ramdisk device driver.
31 * There are three types of ramdisk:
33 * (1) 'virtual' OBP-created ramdisks created with "address" and "size"
34 * properties describing the virtual address range used by the ramdisk
35 * (used on SPARC during boot),
36 * (2) 'physical' OBP-created ramdisks created with an "existing" property
37 * describing a scather-gather list physical address ranges used by the
38 * ramdisk (used on x86 for ramdisk-based root),
39 * (3) 'pseudo' ramdisks created at runtime backed by an explicit list of
40 * pages.
42 * Unlike virtual and physical ramdisks, pseudo ramdisks have no
43 * corresponding OBP device node. The ramdisk(7D) driver is capable of
44 * dealing with all three, including with the creation and deletion of
45 * 'pseudo' ramdisks.
47 * Every ramdisk has a single 'state' structure which maintains data for
48 * that ramdisk, and is assigned a single minor number. The bottom 10-bits
49 * of the minor number index the state structures; the top 8-bits give a
50 * OBP-disk number, i.e. they are zero for 'pseudo' ramdisks.
52 * Each OBP-created ramdisk (i.e., virtual or physical) has its own node in
53 * the device tree with an "address" and "size" (for virtual ramdisks) or
54 * "existing" (for physical ramdisks) properties which describe the memory
55 * assigned to the ramdisk. All 'pseudo' ramdisks share a common devinfo
56 * structure.
58 * A single character device node is used by ramdiskadm(1M) to communicate
59 * with the ramdisk driver, with minor number 0:
61 * /dev/ramdiskctl -> /devices/pseudo/ramdisk@0:ctl
63 * For consistent access, block and raw device nodes are created for *every*
64 * ramdisk. For 'pseudo' ramdisks:
66 * /dev/ramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>
67 * /dev/rramdisk/<diskname> -> /devices/pseudo/ramdisk@0:<diskname>,raw
69 * For OBP-created ramdisks:
71 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a
72 * /dev/ramdisk/<diskname> -> /devices/ramdisk-<diskname>:a,raw
74 * This allows the transition from the standalone to the kernel to proceed
75 * when booting from a ramdisk, and for the installation to correctly identify
76 * the root device.
79 #include <sys/types.h>
80 #include <sys/param.h>
81 #include <sys/sysmacros.h>
82 #include <sys/errno.h>
83 #include <sys/uio.h>
84 #include <sys/buf.h>
85 #include <sys/modctl.h>
86 #include <sys/open.h>
87 #include <sys/kmem.h>
88 #include <sys/poll.h>
89 #include <sys/conf.h>
90 #include <sys/cmn_err.h>
91 #include <sys/stat.h>
92 #include <sys/file.h>
93 #include <sys/ddi.h>
94 #include <sys/sunddi.h>
95 #include <sys/ramdisk.h>
96 #include <vm/seg_kmem.h>
97 #include <vm/seg_kpm.h>
99 struct rd_map {
100 caddr_t vaddr; /* window base address */
101 size_t size; /* size of the virtual window */
102 offset_t base; /* device offset */
103 pfn_t pfn; /* physical "address" */
104 boolean_t mapped; /* is the rest of this struct valid? */
105 boolean_t kpm; /* mapped using kpm */
108 struct rd_ops {
109 int (*alloc)(rd_devstate_t *);
110 void (*dealloc)(rd_devstate_t *);
111 void (*map)(rd_devstate_t *, off_t, struct rd_map *);
112 void (*unmap)(rd_devstate_t *, struct rd_map *);
116 * Flag to disable the use of real ramdisks (in the OBP - on Sparc) when
117 * the associated memory is no longer available - set in the bootops section.
121 * An opaque handle where information about our set of ramdisk devices lives.
123 static void *rd_statep;
126 * Pointer to devinfo for the 'pseudo' ramdisks. Real OBP-created ramdisks
127 * get their own individual devinfo.
129 static dev_info_t *rd_dip = NULL;
132 * Global state lock.
134 static kmutex_t rd_lock;
137 * Maximum number of ramdisks supported by this driver.
139 static uint32_t rd_max_disks = RD_DFLT_DISKS;
142 * Percentage of physical memory which can be assigned to pseudo ramdisks,
143 * what that equates to in pages, and how many pages are currently assigned.
145 static uint_t rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM;
146 static pgcnt_t rd_max_physmem;
147 static pgcnt_t rd_tot_physmem;
149 static uint_t rd_maxphys = RD_DEFAULT_MAXPHYS;
152 * Is the driver busy, i.e. are there any pseudo ramdisk devices in existence?
154 static int
155 rd_is_busy(void)
157 minor_t minor;
158 rd_devstate_t *rsp;
160 ASSERT(mutex_owned(&rd_lock));
161 for (minor = 1; minor <= rd_max_disks; ++minor) {
162 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL &&
163 rsp->rd_dip == rd_dip) {
164 return (EBUSY);
167 return (0);
171 * Find the first free minor number; returns zero if there isn't one.
173 static minor_t
174 rd_find_free_minor(void)
176 minor_t minor;
178 ASSERT(mutex_owned(&rd_lock));
179 for (minor = 1; minor <= rd_max_disks; ++minor) {
180 if (ddi_get_soft_state(rd_statep, minor) == NULL) {
181 return (minor);
184 return (0);
188 * Locate the rd_devstate for the named ramdisk; returns NULL if not found.
189 * Each ramdisk is identified uniquely by name, i.e. an OBP-created ramdisk
190 * cannot have the same name as a pseudo ramdisk.
192 static rd_devstate_t *
193 rd_find_named_disk(char *name)
195 minor_t minor;
196 rd_devstate_t *rsp;
198 ASSERT(mutex_owned(&rd_lock));
199 for (minor = 1; minor <= rd_max_disks; ++minor) {
200 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL &&
201 strcmp(rsp->rd_name, name) == 0) {
202 return (rsp);
205 return (NULL);
209 * Locate the rd_devstate for the OBP-created ramdisk whose devinfo is
210 * referenced by 'dip'; returns NULL if not found (shouldn't happen).
212 static rd_devstate_t *
213 rd_find_dip_state(dev_info_t *dip)
215 minor_t minor;
216 rd_devstate_t *rsp;
218 ASSERT(mutex_owned(&rd_lock));
219 for (minor = 1; minor <= rd_max_disks; ++minor) {
220 if ((rsp = ddi_get_soft_state(rd_statep, minor)) != NULL &&
221 rsp->rd_dip == dip) {
222 return (rsp);
225 return (NULL);
229 * Is the ramdisk open?
231 static int
232 rd_is_open(rd_devstate_t *rsp)
234 ASSERT(mutex_owned(&rd_lock));
235 return (rsp->rd_chr_open || rsp->rd_blk_open || rsp->rd_lyr_open_cnt);
239 * Mark the ramdisk open.
241 static int
242 rd_opened(rd_devstate_t *rsp, int otyp)
244 ASSERT(mutex_owned(&rd_lock));
245 switch (otyp) {
246 case OTYP_CHR:
247 rsp->rd_chr_open = 1;
248 break;
249 case OTYP_BLK:
250 rsp->rd_blk_open = 1;
251 break;
252 case OTYP_LYR:
253 rsp->rd_lyr_open_cnt++;
254 break;
255 default:
256 return (-1);
258 return (0);
262 * Mark the ramdisk closed.
264 static void
265 rd_closed(rd_devstate_t *rsp, int otyp)
267 ASSERT(mutex_owned(&rd_lock));
268 switch (otyp) {
269 case OTYP_CHR:
270 rsp->rd_chr_open = 0;
271 break;
272 case OTYP_BLK:
273 rsp->rd_blk_open = 0;
274 break;
275 case OTYP_LYR:
276 rsp->rd_lyr_open_cnt--;
277 break;
278 default:
279 break;
283 static void
284 rd_init_tuneables(void)
286 char *prop, *p;
289 * Ensure sanity of 'rd_max_disks', which may be tuned in ramdisk.conf.
291 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0,
292 "max_disks", &prop) == DDI_PROP_SUCCESS) {
293 p = prop;
294 rd_max_disks = (uint32_t)stoi(&p);
295 ddi_prop_free(prop);
297 if (rd_max_disks >= RD_MAX_DISKS) {
298 cmn_err(CE_WARN, "ramdisk: rd_max_disks (%u) too big;"
299 " using default (%u).", rd_max_disks, RD_MAX_DISKS - 1);
301 rd_max_disks = RD_MAX_DISKS - 1;
305 * Ensure sanity of 'rd_percent_physmem', which may be tuned
306 * in ramdisk.conf.
308 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, rd_dip, 0,
309 "percent_physmem", &prop) == DDI_PROP_SUCCESS) {
310 p = prop;
311 rd_percent_physmem = (uint_t)stoi(&p);
312 ddi_prop_free(prop);
314 if (rd_percent_physmem >= 100) {
315 cmn_err(CE_WARN, "ramdisk: rd_percent_physmem (%u) >= 100;"
316 " using default (%u%%).", rd_percent_physmem,
317 RD_DEFAULT_PERCENT_PHYSMEM);
319 rd_percent_physmem = RD_DEFAULT_PERCENT_PHYSMEM;
323 * Since availrmem_initial is a long, this won't overflow.
325 rd_max_physmem = (availrmem_initial * rd_percent_physmem) / 100;
329 * Allocate enough physical pages to hold "npages" pages. Returns an
330 * array of page_t * pointers that can later be mapped in or out via
331 * rd_{un}map_window() but is otherwise opaque, or NULL on failure.
333 static page_t **
334 rd_phys_alloc(pgcnt_t npages)
336 page_t *pp, **ppa;
337 spgcnt_t i;
338 size_t ppalen;
339 struct seg kseg;
340 caddr_t addr; /* For coloring */
342 if (rd_tot_physmem + npages > rd_max_physmem)
343 return (NULL);
345 if (!page_resv(npages, KM_NOSLEEP))
346 return (NULL);
348 if (!page_create_wait(npages, 0)) {
349 page_unresv(npages);
350 return (NULL);
353 ppalen = npages * sizeof (struct page_t *);
354 ppa = kmem_zalloc(ppalen, KM_NOSLEEP);
355 if (ppa == NULL) {
356 page_create_putback(npages);
357 page_unresv(npages);
358 return (NULL);
361 kseg.s_as = &kas;
362 for (i = 0, addr = NULL; i < npages; ++i, addr += PAGESIZE) {
363 pp = page_get_freelist(&kvp.v_object, 0, &kseg, addr, PAGESIZE,
364 0, NULL);
365 if (pp == NULL) {
366 pp = page_get_cachelist(&kvp.v_object, 0, &kseg, addr,
367 0, NULL);
368 if (pp == NULL)
369 goto out;
370 if (!PP_ISAGED(pp))
371 page_hashout(pp, false);
374 PP_CLRFREE(pp);
375 PP_CLRAGED(pp);
376 ppa[i] = pp;
379 for (i = 0; i < npages; i++)
380 page_downgrade(ppa[i]);
381 rd_tot_physmem += npages;
383 return (ppa);
385 out:
386 ASSERT(i < npages);
387 page_create_putback(npages - i);
388 while (--i >= 0)
389 page_free(ppa[i], 0);
390 kmem_free(ppa, ppalen);
391 page_unresv(npages);
393 return (NULL);
397 * Free physical pages previously allocated via rd_phys_alloc(); note that
398 * this function may block as it has to wait until it can exclusively lock
399 * all the pages first.
401 static void
402 rd_phys_free(page_t **ppa, pgcnt_t npages)
404 pgcnt_t i;
405 size_t ppalen = npages * sizeof (struct page_t *);
407 for (i = 0; i < npages; ++i) {
408 if (! page_tryupgrade(ppa[i])) {
409 page_unlock(ppa[i]);
410 while (! page_lock(ppa[i], SE_EXCL, NULL, P_RECLAIM))
413 page_free(ppa[i], 0);
416 kmem_free(ppa, ppalen);
418 page_unresv(npages);
419 rd_tot_physmem -= npages;
423 * Remove a window mapping (if present).
425 static void
426 rdop_unmap_physical(rd_devstate_t *rsp, struct rd_map *map)
428 if (map->kpm) {
429 hat_kpm_mapout_pfn(map->pfn);
430 } else {
431 hat_unload(kas.a_hat, map->vaddr, map->size, HAT_UNLOAD_UNLOCK);
432 vmem_free(heap_arena, map->vaddr, map->size);
436 static void
437 rdop_unmap_pseudo(rd_devstate_t *rsp, struct rd_map *map)
439 if (map->kpm) {
440 pgcnt_t offpgs = btop(map->base);
442 hat_kpm_mapout(rsp->rd_ppa[offpgs], NULL, map->vaddr);
443 } else {
444 hat_unload(kas.a_hat, map->vaddr, map->size, HAT_UNLOAD_UNLOCK);
445 vmem_free(heap_arena, map->vaddr, map->size);
449 static void
450 rdop_unmap(rd_devstate_t *rsp, struct rd_map *map)
452 if (!map->mapped)
453 return;
455 if (rsp->rd_ops->unmap)
456 rsp->rd_ops->unmap(rsp, map);
458 map->mapped = B_FALSE;
462 * Map a portion of the ramdisk into the virtual window.
464 static void
465 rdop_map_virtual(rd_devstate_t *rsp, off_t offset, struct rd_map *map)
468 * The whole range is already mapped.
470 map->base = 0;
471 map->size = rsp->rd_size;
472 map->vaddr = rsp->rd_obp_virt;
473 map->mapped = B_TRUE;
476 static void
477 rdop_map_physical(rd_devstate_t *rsp, off_t offset, struct rd_map *map)
479 boolean_t used_kpm = B_TRUE;
480 pgcnt_t offpgs = btop(offset);
481 caddr_t vaddr = NULL;
482 uint_t i;
484 map->base = ptob(offpgs);
485 map->size = PAGESIZE;
488 * Physical ramdisk: locate the physical range which contains this
489 * offset.
491 for (i = 0; i < rsp->rd_nexisting; ++i) {
492 if (offset < rsp->rd_existing[i].size)
493 break;
495 offset -= rsp->rd_existing[i].size;
497 ASSERT3U(i, <, rsp->rd_nexisting);
499 map->pfn = btop(rsp->rd_existing[i].phys + offset);
502 * Load the mapping.
504 if (kpm_enable)
505 vaddr = hat_kpm_mapin_pfn(map->pfn);
507 if (!vaddr) {
508 used_kpm = B_FALSE;
509 vaddr = vmem_alloc(heap_arena, map->size, VM_SLEEP);
511 /* XXX: support more than 1 page map */
512 hat_devload(kas.a_hat, vaddr, map->size, map->pfn,
513 PROT_READ | PROT_WRITE,
514 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
517 map->vaddr = vaddr;
518 map->kpm = used_kpm;
519 map->mapped = B_TRUE;
522 static void
523 rdop_map_pseudo(rd_devstate_t *rsp, off_t offset, struct rd_map *map)
525 boolean_t used_kpm = B_TRUE;
526 pgcnt_t offpgs = btop(offset);
527 caddr_t vaddr = NULL;
529 map->base = ptob(offpgs);
530 map->size = PAGESIZE;
533 * Load the mapping.
535 if (kpm_enable)
536 vaddr = hat_kpm_mapin(rsp->rd_ppa[offpgs], NULL);
538 if (!vaddr) {
539 used_kpm = B_FALSE;
540 vaddr = vmem_alloc(heap_arena, map->size, VM_SLEEP);
542 /* XXX: support more than 1 page map */
543 hat_memload(kas.a_hat, vaddr, rsp->rd_ppa[offpgs],
544 PROT_READ | PROT_WRITE | HAT_NOSYNC, HAT_LOAD_LOCK);
547 map->vaddr = vaddr;
548 map->kpm = used_kpm;
549 map->mapped = B_TRUE;
552 static void
553 rdop_map(rd_devstate_t *rsp, off_t offset, struct rd_map *map)
555 if (map->mapped) {
557 * Already mapped; is offset within our window?
559 if (offset >= map->base && offset < map->base + map->size)
560 return;
563 * No, we need to re-map; toss the old mapping.
565 rdop_unmap(rsp, map);
568 ASSERT3U(map->mapped, ==, B_FALSE);
570 rsp->rd_ops->map(rsp, offset, map);
573 static int
574 rdop_alloc_obp(rd_devstate_t *rsp)
577 * For OBP-created ramdisks the device nodes are:
579 * /devices/ramdisk-<diskname>:a
580 * /devices/ramdisk-<diskname>:a,raw
582 if (ddi_create_minor_node(rsp->rd_dip, "a", S_IFBLK, rsp->rd_minor,
583 DDI_PSEUDO, 0) == DDI_FAILURE)
584 return (1);
586 if (ddi_create_minor_node(rsp->rd_dip, "a,raw", S_IFCHR, rsp->rd_minor,
587 DDI_PSEUDO, 0) == DDI_FAILURE)
588 return (1);
590 return (0);
593 static int
594 rdop_alloc_pseudo(rd_devstate_t *rsp)
596 char namebuf[RD_NAME_LEN + 5];
598 rsp->rd_npages = btopr(rsp->rd_size);
599 rsp->rd_ppa = rd_phys_alloc(rsp->rd_npages);
600 if (rsp->rd_ppa == NULL)
601 return (1);
604 * For non-OBP ramdisks the device nodes are:
606 * /devices/pseudo/ramdisk@0:<diskname>
607 * /devices/pseudo/ramdisk@0:<diskname>,raw
609 (void) snprintf(namebuf, sizeof (namebuf), "%s", rsp->rd_name);
610 if (ddi_create_minor_node(rsp->rd_dip, namebuf, S_IFBLK, rsp->rd_minor,
611 DDI_PSEUDO, 0) == DDI_FAILURE)
612 return (1);
614 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", rsp->rd_name);
615 if (ddi_create_minor_node(rsp->rd_dip, namebuf, S_IFCHR, rsp->rd_minor,
616 DDI_PSEUDO, 0) == DDI_FAILURE)
617 return (1);
619 return (0);
622 static int
623 rdop_alloc(rd_devstate_t *rsp)
625 return (rsp->rd_ops->alloc(rsp));
628 static void
629 rdop_dealloc_obp(rd_devstate_t *rsp)
631 ddi_remove_minor_node(rsp->rd_dip, "a");
632 ddi_remove_minor_node(rsp->rd_dip, "a,raw");
635 static void
636 rdop_dealloc_pseudo(rd_devstate_t *rsp)
638 char namebuf[RD_NAME_LEN + 5];
640 (void) snprintf(namebuf, sizeof (namebuf), "%s", rsp->rd_name);
641 ddi_remove_minor_node(rsp->rd_dip, namebuf);
642 (void) snprintf(namebuf, sizeof (namebuf), "%s,raw", rsp->rd_name);
643 ddi_remove_minor_node(rsp->rd_dip, namebuf);
646 static void
647 rdop_dealloc(rd_devstate_t *rsp)
649 rsp->rd_ops->dealloc(rsp);
652 static const struct rd_ops rd_virtual_ops = {
653 .alloc = rdop_alloc_obp,
654 .dealloc = rdop_dealloc_obp,
655 .map = rdop_map_virtual,
658 static const struct rd_ops rd_physical_ops = {
659 .alloc = rdop_alloc_obp,
660 .dealloc = rdop_dealloc_obp,
661 .map = rdop_map_physical,
662 .unmap = rdop_unmap_physical,
665 static const struct rd_ops rd_pseudo_ops = {
666 .alloc = rdop_alloc_pseudo,
667 .dealloc = rdop_dealloc_pseudo,
668 .map = rdop_map_pseudo,
669 .unmap = rdop_unmap_pseudo,
673 * Fakes up a disk geometry, and one big partition, based on the size
674 * of the file. This is needed because we allow newfs'ing the device,
675 * and newfs will do several disk ioctls to figure out the geometry and
676 * partition information. It uses that information to determine the parameters
677 * to pass to mkfs. Geometry is pretty much irrelevant these days, but we
678 * have to support it.
680 * Stolen from lofi.c - should maybe split out common code sometime.
682 static void
683 rd_fake_disk_geometry(rd_devstate_t *rsp)
685 /* dk_geom - see dkio(7I) */
687 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
688 * of sectors), but that breaks programs like fdisk which want to
689 * partition a disk by cylinder. With one cylinder, you can't create
690 * an fdisk partition and put pcfs on it for testing (hard to pick
691 * a number between one and one).
693 * The cheezy floppy test is an attempt to not have too few cylinders
694 * for a small file, or so many on a big file that you waste space
695 * for backup superblocks or cylinder group structures.
697 if (rsp->rd_size < (2 * 1024 * 1024)) /* floppy? */
698 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (100 * 1024);
699 else
700 rsp->rd_dkg.dkg_ncyl = rsp->rd_size / (300 * 1024);
701 /* in case file file is < 100k */
702 if (rsp->rd_dkg.dkg_ncyl == 0)
703 rsp->rd_dkg.dkg_ncyl = 1;
704 rsp->rd_dkg.dkg_acyl = 0;
705 rsp->rd_dkg.dkg_bcyl = 0;
706 rsp->rd_dkg.dkg_nhead = 1;
707 rsp->rd_dkg.dkg_obs1 = 0;
708 rsp->rd_dkg.dkg_intrlv = 0;
709 rsp->rd_dkg.dkg_obs2 = 0;
710 rsp->rd_dkg.dkg_obs3 = 0;
711 rsp->rd_dkg.dkg_apc = 0;
712 rsp->rd_dkg.dkg_rpm = 7200;
713 rsp->rd_dkg.dkg_pcyl = rsp->rd_dkg.dkg_ncyl + rsp->rd_dkg.dkg_acyl;
714 rsp->rd_dkg.dkg_nsect = rsp->rd_size /
715 (DEV_BSIZE * rsp->rd_dkg.dkg_ncyl);
716 rsp->rd_dkg.dkg_write_reinstruct = 0;
717 rsp->rd_dkg.dkg_read_reinstruct = 0;
719 /* vtoc - see dkio(7I) */
720 bzero(&rsp->rd_vtoc, sizeof (struct vtoc));
721 rsp->rd_vtoc.v_sanity = VTOC_SANE;
722 rsp->rd_vtoc.v_version = V_VERSION;
723 bcopy(RD_DRIVER_NAME, rsp->rd_vtoc.v_volume, 7);
724 rsp->rd_vtoc.v_sectorsz = DEV_BSIZE;
725 rsp->rd_vtoc.v_nparts = 1;
726 rsp->rd_vtoc.v_part[0].p_tag = V_UNASSIGNED;
727 rsp->rd_vtoc.v_part[0].p_flag = V_UNMNT;
728 rsp->rd_vtoc.v_part[0].p_start = (daddr_t)0;
730 * The partition size cannot just be the number of sectors, because
731 * that might not end on a cylinder boundary. And if that's the case,
732 * newfs/mkfs will print a scary warning. So just figure the size
733 * based on the number of cylinders and sectors/cylinder.
735 rsp->rd_vtoc.v_part[0].p_size = rsp->rd_dkg.dkg_pcyl *
736 rsp->rd_dkg.dkg_nsect * rsp->rd_dkg.dkg_nhead;
738 /* dk_cinfo - see dkio(7I) */
739 bzero(&rsp->rd_ci, sizeof (struct dk_cinfo));
740 (void) strcpy(rsp->rd_ci.dki_cname, RD_DRIVER_NAME);
741 rsp->rd_ci.dki_ctype = DKC_MD;
742 rsp->rd_ci.dki_flags = 0;
743 rsp->rd_ci.dki_cnum = 0;
744 rsp->rd_ci.dki_addr = 0;
745 rsp->rd_ci.dki_space = 0;
746 rsp->rd_ci.dki_prio = 0;
747 rsp->rd_ci.dki_vec = 0;
748 (void) strcpy(rsp->rd_ci.dki_dname, RD_DRIVER_NAME);
749 rsp->rd_ci.dki_unit = 0;
750 rsp->rd_ci.dki_slave = 0;
751 rsp->rd_ci.dki_partition = 0;
753 * newfs uses this to set maxcontig. Must not be < 16, or it
754 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
755 * it by the block size. Then tunefs doesn't work because
756 * maxcontig is 0.
758 rsp->rd_ci.dki_maxtransfer = 16;
762 * Deallocate resources (virtual and physical, device nodes, structures)
763 * from a ramdisk.
765 static void
766 rd_dealloc_resources(rd_devstate_t *rsp)
768 dev_info_t *dip = rsp->rd_dip;
769 dev_t fulldev;
771 mutex_destroy(&rsp->rd_device_lock);
773 if (rsp->rd_existing)
774 ddi_prop_free(rsp->rd_existing);
776 if (rsp->rd_ppa != NULL)
777 rd_phys_free(rsp->rd_ppa, rsp->rd_npages);
780 * Remove the block and raw device nodes.
782 rdop_dealloc(rsp);
785 * Remove the "Size" and "Nblocks" properties.
787 fulldev = makedevice(ddi_driver_major(dip), rsp->rd_minor);
788 (void) ddi_prop_remove(fulldev, dip, SIZE_PROP_NAME);
789 (void) ddi_prop_remove(fulldev, dip, NBLOCKS_PROP_NAME);
791 if (rsp->rd_kstat) {
792 kstat_delete(rsp->rd_kstat);
793 mutex_destroy(&rsp->rd_kstat_lock);
796 ddi_soft_state_free(rd_statep, rsp->rd_minor);
800 * Allocate resources (virtual and physical memory, device nodes, structures)
801 * for a ramdisk.
803 static rd_devstate_t *
804 rd_alloc_resources(char *name, const struct rd_ops *ops, uint_t addr,
805 size_t size, dev_info_t *dip)
807 minor_t minor;
808 rd_devstate_t *rsp;
809 dev_t fulldev;
810 int64_t Nblocks_prop_val;
811 int64_t Size_prop_val;
813 minor = rd_find_free_minor();
814 if (ddi_soft_state_zalloc(rd_statep, minor) == DDI_FAILURE) {
815 return (NULL);
817 rsp = ddi_get_soft_state(rd_statep, minor);
819 (void) strcpy(rsp->rd_name, name);
820 rsp->rd_dip = dip;
821 rsp->rd_minor = minor;
822 rsp->rd_size = size;
823 rsp->rd_ops = ops;
824 rsp->rd_obp_virt = (caddr_t)(ulong_t)addr;
826 mutex_init(&rsp->rd_device_lock, NULL, MUTEX_DRIVER, NULL);
829 * Allocate physical memory for non-OBP ramdisks.
830 * Create pseudo block and raw device nodes.
832 if (rdop_alloc(rsp))
833 goto create_failed;
836 * Create the "Size" and "Nblocks" properties.
838 fulldev = makedevice(ddi_driver_major(dip), minor);
839 Size_prop_val = size;
840 if ((ddi_prop_update_int64(fulldev, dip,
841 SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) {
842 goto create_failed;
844 Nblocks_prop_val = size / DEV_BSIZE;
845 if ((ddi_prop_update_int64(fulldev, dip,
846 NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
847 goto create_failed;
851 * Allocate kstat stuff.
853 rsp->rd_kstat = kstat_create(RD_DRIVER_NAME, minor, NULL,
854 "disk", KSTAT_TYPE_IO, 1, 0);
855 if (rsp->rd_kstat) {
856 mutex_init(&rsp->rd_kstat_lock, NULL,
857 MUTEX_DRIVER, NULL);
858 rsp->rd_kstat->ks_lock = &rsp->rd_kstat_lock;
859 kstat_install(rsp->rd_kstat);
862 rd_fake_disk_geometry(rsp);
864 return (rsp);
866 create_failed:
868 * Cleanup.
870 rd_dealloc_resources(rsp);
872 return (NULL);
876 * Undo what we did in rd_attach, freeing resources and removing things which
877 * we installed. The system framework guarantees we are not active with this
878 * devinfo node in any other entry points at this time.
880 static int
881 rd_common_detach(dev_info_t *dip)
883 if (dip == rd_dip) {
885 * Pseudo node: can't detach if any pseudo ramdisks exist.
887 if (rd_is_busy()) {
888 return (DDI_FAILURE);
890 ddi_soft_state_free(rd_statep, RD_CTL_MINOR);
891 rd_dip = NULL;
892 } else {
894 * A non-pseudo ramdisk; find the state and free resources.
896 rd_devstate_t *rsp;
898 if ((rsp = rd_find_dip_state(dip)) != NULL)
899 rd_dealloc_resources(rsp);
902 ddi_remove_minor_node(dip, NULL);
904 return (DDI_SUCCESS);
907 static int
908 rd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
910 char *name;
911 rd_existing_t *ep = NULL;
912 uint_t obpaddr = 0, nep, i;
913 size_t size = 0;
914 rd_devstate_t *rsp;
916 switch (cmd) {
918 case DDI_ATTACH:
919 mutex_enter(&rd_lock);
922 * For pseudo ramdisk devinfo set up state 0 and :ctl device;
923 * else it's an OBP-created ramdisk.
925 if (is_pseudo_device(dip)) {
926 rd_dip = dip;
927 rd_init_tuneables();
930 * The zeroth minor is reserved for the ramdisk
931 * 'control' device.
933 if (ddi_soft_state_zalloc(rd_statep, RD_CTL_MINOR) ==
934 DDI_FAILURE) {
935 goto attach_failed;
937 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR);
938 rsp->rd_dip = dip;
940 if (ddi_create_minor_node(dip, RD_CTL_NODE,
941 S_IFCHR, 0, DDI_PSEUDO, 0) == DDI_FAILURE) {
942 goto attach_failed;
944 } else {
945 const struct rd_ops *ops;
948 RD_STRIP_PREFIX(name, ddi_node_name(dip));
950 if (strlen(name) > RD_NAME_LEN) {
951 cmn_err(CE_CONT,
952 "%s: name too long - ignoring\n", name);
953 goto attach_failed;
957 * An OBP-created ramdisk must have an 'existing'
958 * property; get and check it.
960 if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, dip,
961 DDI_PROP_DONTPASS, OBP_EXISTING_PROP_NAME,
962 (uchar_t **)&ep, &nep) == DDI_SUCCESS) {
964 if (nep == 0 || (nep % sizeof (*ep)) != 0) {
965 cmn_err(CE_CONT,
966 "%s: " OBP_EXISTING_PROP_NAME
967 " illegal size\n", name);
968 goto attach_failed;
970 nep /= sizeof (*ep);
973 * Calculate the size of the ramdisk.
975 for (i = 0; i < nep; ++i)
976 size += ep[i].size;
978 ops = &rd_physical_ops;
979 } else if ((obpaddr = ddi_prop_get_int(DDI_DEV_T_ANY,
980 dip, DDI_PROP_DONTPASS, OBP_ADDRESS_PROP_NAME,
981 0)) != 0) {
983 size = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
984 DDI_PROP_DONTPASS, OBP_SIZE_PROP_NAME, 0);
986 ops = &rd_virtual_ops;
987 } else {
988 cmn_err(CE_CONT, "%s: missing OBP properties\n",
989 name);
990 goto attach_failed;
994 * Allocate driver resources for the ramdisk.
996 if ((rsp = rd_alloc_resources(name, ops, obpaddr, size,
997 dip)) == NULL)
998 goto attach_failed;
1000 rsp->rd_existing = ep;
1001 rsp->rd_nexisting = nep;
1004 mutex_exit(&rd_lock);
1006 ddi_report_dev(dip);
1008 return (DDI_SUCCESS);
1010 case DDI_RESUME:
1011 return (DDI_SUCCESS);
1013 default:
1014 return (DDI_FAILURE);
1017 attach_failed:
1019 * Use our common detach routine to unallocate any stuff which
1020 * was allocated above.
1022 (void) rd_common_detach(dip);
1023 mutex_exit(&rd_lock);
1025 if (ep != NULL) {
1026 ddi_prop_free(ep);
1028 return (DDI_FAILURE);
1031 static int
1032 rd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1034 int e;
1036 switch (cmd) {
1038 case DDI_DETACH:
1039 mutex_enter(&rd_lock);
1040 e = rd_common_detach(dip);
1041 mutex_exit(&rd_lock);
1043 return (e);
1045 case DDI_SUSPEND:
1046 return (DDI_SUCCESS);
1048 default:
1049 return (DDI_FAILURE);
1053 /*ARGSUSED*/
1054 static int
1055 rd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1057 rd_devstate_t *rsp;
1059 switch (infocmd) {
1060 case DDI_INFO_DEVT2DEVINFO:
1061 if ((rsp = ddi_get_soft_state(rd_statep,
1062 getminor((dev_t)arg))) != NULL) {
1063 *result = rsp->rd_dip;
1064 return (DDI_SUCCESS);
1066 *result = NULL;
1067 return (DDI_FAILURE);
1069 case DDI_INFO_DEVT2INSTANCE:
1070 if ((rsp = ddi_get_soft_state(rd_statep,
1071 getminor((dev_t)arg))) != NULL) {
1072 *result = (void *)(uintptr_t)
1073 ddi_get_instance(rsp->rd_dip);
1074 return (DDI_SUCCESS);
1076 *result = NULL;
1077 return (DDI_FAILURE);
1079 default:
1080 return (DDI_FAILURE);
1084 /*ARGSUSED3*/
1085 static int
1086 rd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1088 minor_t minor;
1089 rd_devstate_t *rsp;
1091 mutex_enter(&rd_lock);
1093 minor = getminor(*devp);
1094 if (minor == RD_CTL_MINOR) {
1096 * Master control device; must be opened exclusively.
1098 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) {
1099 mutex_exit(&rd_lock);
1100 return (EINVAL);
1103 rsp = ddi_get_soft_state(rd_statep, RD_CTL_MINOR);
1104 if (rsp == NULL) {
1105 mutex_exit(&rd_lock);
1106 return (ENXIO);
1109 if (rd_is_open(rsp)) {
1110 mutex_exit(&rd_lock);
1111 return (EBUSY);
1113 (void) rd_opened(rsp, OTYP_CHR);
1115 mutex_exit(&rd_lock);
1117 return (0);
1120 rsp = ddi_get_soft_state(rd_statep, minor);
1121 if (rsp == NULL) {
1122 mutex_exit(&rd_lock);
1123 return (ENXIO);
1126 if (rd_opened(rsp, otyp) == -1) {
1127 mutex_exit(&rd_lock);
1128 return (EINVAL);
1131 mutex_exit(&rd_lock);
1132 return (0);
1135 /*ARGSUSED*/
1136 static int
1137 rd_close(dev_t dev, int flag, int otyp, struct cred *credp)
1139 minor_t minor;
1140 rd_devstate_t *rsp;
1142 mutex_enter(&rd_lock);
1144 minor = getminor(dev);
1146 rsp = ddi_get_soft_state(rd_statep, minor);
1147 if (rsp == NULL) {
1148 mutex_exit(&rd_lock);
1149 return (EINVAL);
1152 rd_closed(rsp, otyp);
1154 mutex_exit(&rd_lock);
1156 return (0);
1159 static void
1160 rd_minphys(struct buf *bp)
1162 if (bp->b_bcount > rd_maxphys) {
1163 bp->b_bcount = rd_maxphys;
1167 static void
1168 rd_rw(rd_devstate_t *rsp, struct buf *bp, offset_t offset, size_t nbytes)
1170 const int reading = bp->b_flags & B_READ;
1171 struct rd_map map;
1172 caddr_t buf_addr;
1174 map.mapped = B_FALSE;
1176 bp_mapin(bp);
1177 buf_addr = bp->b_un.b_addr;
1179 while (nbytes > 0) {
1180 offset_t off_in_window;
1181 size_t rem_in_window, copy_bytes;
1182 caddr_t raddr;
1184 rdop_map(rsp, offset, &map);
1186 off_in_window = offset - map.base;
1187 rem_in_window = map.size - off_in_window;
1189 raddr = map.vaddr + off_in_window;
1190 copy_bytes = MIN(nbytes, rem_in_window);
1192 mutex_enter(&rsp->rd_device_lock);
1193 if (reading) {
1194 (void) bcopy(raddr, buf_addr, copy_bytes);
1195 } else {
1196 (void) bcopy(buf_addr, raddr, copy_bytes);
1198 mutex_exit(&rsp->rd_device_lock);
1200 offset += copy_bytes;
1201 buf_addr += copy_bytes;
1202 nbytes -= copy_bytes;
1205 rdop_unmap(rsp, &map);
1209 * On Sparc, this function deals with both pseudo ramdisks and OBP ramdisks.
1210 * In the case where we freed the "bootarchive" ramdisk in bop_free_archive(),
1211 * we stop allowing access to the OBP ramdisks. To do so, we set the
1212 * bootops_obp_ramdisk_disabled flag to true, and we check if the operation
1213 * is for an OBP ramdisk. In this case we indicate an ENXIO error.
1215 static int
1216 rd_strategy(struct buf *bp)
1218 rd_devstate_t *rsp;
1219 offset_t offset;
1221 rsp = ddi_get_soft_state(rd_statep, getminor(bp->b_edev));
1222 offset = bp->b_blkno * DEV_BSIZE;
1224 if (rsp == NULL) {
1225 bp->b_error = ENXIO;
1226 bp->b_flags |= B_ERROR;
1227 } else if (offset >= rsp->rd_size) {
1228 bp->b_error = EINVAL;
1229 bp->b_flags |= B_ERROR;
1230 } else {
1231 size_t nbytes;
1233 if (rsp->rd_kstat) {
1234 mutex_enter(rsp->rd_kstat->ks_lock);
1235 kstat_runq_enter(KSTAT_IO_PTR(rsp->rd_kstat));
1236 mutex_exit(rsp->rd_kstat->ks_lock);
1239 nbytes = min(bp->b_bcount, rsp->rd_size - offset);
1241 rd_rw(rsp, bp, offset, nbytes);
1243 bp->b_resid = bp->b_bcount - nbytes;
1245 if (rsp->rd_kstat) {
1246 kstat_io_t *kioptr;
1248 mutex_enter(rsp->rd_kstat->ks_lock);
1249 kioptr = KSTAT_IO_PTR(rsp->rd_kstat);
1250 if (bp->b_flags & B_READ) {
1251 kioptr->nread += nbytes;
1252 kioptr->reads++;
1253 } else {
1254 kioptr->nwritten += nbytes;
1255 kioptr->writes++;
1257 kstat_runq_exit(kioptr);
1258 mutex_exit(rsp->rd_kstat->ks_lock);
1262 biodone(bp);
1263 return (0);
1266 /*ARGSUSED*/
1267 static int
1268 rd_read(dev_t dev, struct uio *uiop, cred_t *credp)
1270 rd_devstate_t *rsp;
1272 rsp = ddi_get_soft_state(rd_statep, getminor(dev));
1274 if (uiop->uio_offset >= rsp->rd_size)
1275 return (EINVAL);
1277 return (physio(rd_strategy, NULL, dev, B_READ, rd_minphys, uiop));
1280 /*ARGSUSED*/
1281 static int
1282 rd_write(dev_t dev, register struct uio *uiop, cred_t *credp)
1284 rd_devstate_t *rsp;
1286 rsp = ddi_get_soft_state(rd_statep, getminor(dev));
1288 if (uiop->uio_offset >= rsp->rd_size)
1289 return (EINVAL);
1291 return (physio(rd_strategy, NULL, dev, B_WRITE, rd_minphys, uiop));
1294 /*ARGSUSED*/
1295 static int
1296 rd_create_disk(dev_t dev, struct rd_ioctl *urip, int mode, int *rvalp)
1298 struct rd_ioctl kri;
1299 size_t size;
1300 rd_devstate_t *rsp;
1302 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) {
1303 return (EFAULT);
1306 kri.ri_name[RD_NAME_LEN] = '\0';
1308 size = kri.ri_size;
1309 if (size == 0) {
1310 return (EINVAL);
1312 size = ptob(btopr(size));
1314 mutex_enter(&rd_lock);
1316 if (rd_find_named_disk(kri.ri_name) != NULL) {
1317 mutex_exit(&rd_lock);
1318 return (EEXIST);
1321 rsp = rd_alloc_resources(kri.ri_name, &rd_pseudo_ops, 0, size, rd_dip);
1322 if (rsp == NULL) {
1323 mutex_exit(&rd_lock);
1324 return (EAGAIN);
1327 mutex_exit(&rd_lock);
1329 return (ddi_copyout(&kri, urip, sizeof (kri), mode) == -1 ? EFAULT : 0);
1332 /*ARGSUSED*/
1333 static int
1334 rd_delete_disk(dev_t dev, struct rd_ioctl *urip, int mode)
1336 struct rd_ioctl kri;
1337 rd_devstate_t *rsp;
1339 if (ddi_copyin(urip, &kri, sizeof (kri), mode) == -1) {
1340 return (EFAULT);
1343 kri.ri_name[RD_NAME_LEN] = '\0';
1345 mutex_enter(&rd_lock);
1347 rsp = rd_find_named_disk(kri.ri_name);
1348 if (rsp == NULL || rsp->rd_dip != rd_dip) {
1349 mutex_exit(&rd_lock);
1350 return (EINVAL);
1352 if (rd_is_open(rsp)) {
1353 mutex_exit(&rd_lock);
1354 return (EBUSY);
1357 rd_dealloc_resources(rsp);
1359 mutex_exit(&rd_lock);
1361 return (0);
1364 /*ARGSUSED*/
1365 static int
1366 rd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1368 minor_t minor;
1369 int error;
1370 enum dkio_state dkstate;
1371 rd_devstate_t *rsp;
1373 minor = getminor(dev);
1376 * Ramdisk ioctls only apply to the master device.
1378 if (minor == RD_CTL_MINOR) {
1379 struct rd_ioctl *rip = (struct rd_ioctl *)arg;
1382 * The query commands only need read-access - i.e., normal
1383 * users are allowed to do those on the controlling device
1384 * as long as they can open it read-only.
1386 switch (cmd) {
1387 case RD_CREATE_DISK:
1388 if ((mode & FWRITE) == 0)
1389 return (EPERM);
1390 return (rd_create_disk(dev, rip, mode, rvalp));
1392 case RD_DELETE_DISK:
1393 if ((mode & FWRITE) == 0)
1394 return (EPERM);
1395 return (rd_delete_disk(dev, rip, mode));
1397 default:
1398 return (EINVAL);
1402 rsp = ddi_get_soft_state(rd_statep, minor);
1403 if (rsp == NULL) {
1404 return (ENXIO);
1408 * These are for faking out utilities like newfs.
1410 switch (cmd) {
1411 case DKIOCGVTOC:
1412 switch (ddi_model_convert_from(mode & FMODELS)) {
1413 case DDI_MODEL_ILP32: {
1414 struct vtoc32 vtoc32;
1416 vtoctovtoc32(rsp->rd_vtoc, vtoc32);
1417 if (ddi_copyout(&vtoc32, (void *)arg,
1418 sizeof (struct vtoc32), mode))
1419 return (EFAULT);
1421 break;
1423 case DDI_MODEL_NONE:
1424 if (ddi_copyout(&rsp->rd_vtoc, (void *)arg,
1425 sizeof (struct vtoc), mode))
1426 return (EFAULT);
1427 break;
1429 return (0);
1430 case DKIOCINFO:
1431 error = ddi_copyout(&rsp->rd_ci, (void *)arg,
1432 sizeof (struct dk_cinfo), mode);
1433 if (error)
1434 return (EFAULT);
1435 return (0);
1436 case DKIOCG_VIRTGEOM:
1437 case DKIOCG_PHYGEOM:
1438 case DKIOCGGEOM:
1439 error = ddi_copyout(&rsp->rd_dkg, (void *)arg,
1440 sizeof (struct dk_geom), mode);
1441 if (error)
1442 return (EFAULT);
1443 return (0);
1444 case DKIOCSTATE:
1445 /* the file is always there */
1446 dkstate = DKIO_INSERTED;
1447 error = ddi_copyout(&dkstate, (void *)arg,
1448 sizeof (enum dkio_state), mode);
1449 if (error)
1450 return (EFAULT);
1451 return (0);
1452 default:
1453 return (ENOTTY);
1458 static struct cb_ops rd_cb_ops = {
1459 rd_open,
1460 rd_close,
1461 rd_strategy,
1462 nodev,
1463 nodev, /* dump */
1464 rd_read,
1465 rd_write,
1466 rd_ioctl,
1467 nodev, /* devmap */
1468 nodev, /* mmap */
1469 nodev, /* segmap */
1470 nochpoll, /* poll */
1471 ddi_prop_op,
1472 NULL,
1473 D_NEW | D_MP
1476 static struct dev_ops rd_ops = {
1477 DEVO_REV,
1479 rd_getinfo,
1480 nulldev, /* identify */
1481 nulldev, /* probe */
1482 rd_attach,
1483 rd_detach,
1484 nodev, /* reset */
1485 &rd_cb_ops,
1486 NULL,
1487 NULL,
1488 ddi_quiesce_not_needed, /* quiesce */
1492 extern struct mod_ops mod_driverops;
1494 static struct modldrv modldrv = {
1495 &mod_driverops,
1496 "ramdisk driver",
1497 &rd_ops
1500 static struct modlinkage modlinkage = {
1501 MODREV_1,
1502 &modldrv,
1507 _init(void)
1509 int e;
1511 if ((e = ddi_soft_state_init(&rd_statep,
1512 sizeof (rd_devstate_t), 0)) != 0) {
1513 return (e);
1516 mutex_init(&rd_lock, NULL, MUTEX_DRIVER, NULL);
1518 if ((e = mod_install(&modlinkage)) != 0) {
1519 mutex_destroy(&rd_lock);
1520 ddi_soft_state_fini(&rd_statep);
1523 return (e);
1527 _fini(void)
1529 int e;
1531 if ((e = mod_remove(&modlinkage)) != 0) {
1532 return (e);
1535 ddi_soft_state_fini(&rd_statep);
1536 mutex_destroy(&rd_lock);
1538 return (e);
1542 _info(struct modinfo *modinfop)
1544 return (mod_info(&modlinkage, modinfop));