4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 Andrey Sokolov
26 * Copyright 2016 Toomas Soome <tsoome@me.com>
30 * lofi (loopback file) driver - allows you to attach a file to a device,
31 * which can then be accessed through that device. The simple model is that
32 * you tell lofi to open a file, and then use the block device you get as
33 * you would any block device. lofi translates access to the block device
34 * into I/O on the underlying file. This is mostly useful for
35 * mounting images of filesystems.
37 * lofi is controlled through /dev/lofictl - this is the only device exported
38 * during attach, and is instance number 0. lofiadm communicates with lofi
39 * through ioctls on this device. When a file is attached to lofi, block and
40 * character devices are exported in /dev/lofi and /dev/rlofi. These devices
41 * are identified by lofi instance number, and the instance number is also used
42 * as the name in /dev/lofi.
44 * Virtual disks, or, labeled lofi, implements virtual disk support to
45 * support partition table and related tools. Such mappings will cause
46 * block and character devices to be exported in /dev/dsk and /dev/rdsk
49 * To support virtual disks, the instance number space is divided to two
50 * parts, upper part for instance number and lower part for minor number
51 * space to identify partitions and slices. The virtual disk support is
52 * implemented by stacking cmlb module. For virtual disks, the partition
53 * related ioctl calls are routed to cmlb module. Compression and encryption
54 * is not supported for virtual disks.
56 * Mapped devices are tracked with state structures handled with
57 * ddi_soft_state(9F) for simplicity.
59 * A file attached to lofi is opened when attached and not closed until
60 * explicitly detached from lofi. This seems more sensible than deferring
61 * the open until the /dev/lofi device is opened, for a number of reasons.
62 * One is that any failure is likely to be noticed by the person (or script)
63 * running lofiadm. Another is that it would be a security problem if the
64 * file was replaced by another one after being added but before being opened.
66 * The only hard part about lofi is the ioctls. In order to support things
67 * like 'newfs' on a lofi device, it needs to support certain disk ioctls.
68 * So it has to fake disk geometry and partition information. More may need
69 * to be faked if your favorite utility doesn't work and you think it should
70 * (fdformat doesn't work because it really wants to know the type of floppy
71 * controller to talk to, and that didn't seem easy to fake. Or possibly even
72 * necessary, since we have mkfs_pcfs now).
74 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To
75 * support simulation of hotplug events, an optional force flag is provided.
76 * If a lofi device is open when a force detach is requested, then the
77 * underlying file is closed and any subsequent operations return EIO. When the
78 * device is closed for the last time, it will be cleaned up at that time. In
79 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is
80 * detached but not removed.
82 * If detach was requested and lofi device is not open, we will perform
83 * unmap and remove the lofi instance.
85 * If the lofi device is open and the li_cleanup is set on ioctl request,
86 * we set ls_cleanup flag to notify the cleanup is requested, and the
87 * last lofi_close will perform the unmapping and this lofi instance will be
90 * If the lofi device is open and the li_force is set on ioctl request,
91 * we set ls_cleanup flag to notify the cleanup is requested,
92 * we also set ls_vp_closereq to notify IO tasks to return EIO on new
93 * IO requests and wait in process IO count to become 0, indicating there
94 * are no more IO requests. Since ls_cleanup is set, the last lofi_close
95 * will perform unmap and this lofi instance will be removed.
96 * See also lofi_unmap_file() for details.
98 * Once ls_cleanup is set for the instance, we do not allow lofi_open()
99 * calls to succeed and can have last lofi_close() to remove the instance.
103 * UFS logging. Mounting a UFS filesystem image "logging"
104 * works for basic copy testing but wedges during a build of ON through
105 * that image. Some deadlock in lufs holding the log mutex and then
106 * getting stuck on a buf. So for now, don't do that.
108 * Direct I/O. Since the filesystem data is being cached in the buffer
109 * cache, _and_ again in the underlying filesystem, it's tempting to
110 * enable direct I/O on the underlying file. Don't, because that deadlocks.
111 * I think to fix the cache-twice problem we might need filesystem support.
113 * Interesting things to do:
115 * Allow multiple files for each device. A poor-man's metadisk, basically.
117 * Pass-through ioctls on block devices. You can (though it's not
118 * documented), give lofi a block device as a file name. Then we shouldn't
119 * need to fake a geometry, however, it may be relevant if you're replacing
120 * metadisk, or using lofi to get crypto.
121 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1
122 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home.
123 * In fact this even makes sense if you have lofi "above" metadisk.
126 * Each lofi device can have its own symmetric key and cipher.
127 * They are passed to us by lofiadm(1m) in the correct format for use
128 * with the misc/kcf crypto_* routines.
130 * Each block has its own IV, that is calculated in lofi_blk_mech(), based
131 * on the "master" key held in the lsp and the block number of the buffer.
134 #include <sys/types.h>
135 #include <netinet/in.h>
136 #include <sys/sysmacros.h>
138 #include <sys/kmem.h>
139 #include <sys/cred.h>
140 #include <sys/mman.h>
141 #include <sys/errno.h>
142 #include <sys/aio_req.h>
143 #include <sys/stat.h>
144 #include <sys/file.h>
145 #include <sys/modctl.h>
146 #include <sys/conf.h>
147 #include <sys/debug.h>
148 #include <sys/vnode.h>
149 #include <sys/lofi.h>
150 #include <sys/lofi_impl.h> /* for cache structure */
151 #include <sys/fcntl.h>
152 #include <sys/pathname.h>
153 #include <sys/filio.h>
154 #include <sys/fdio.h>
155 #include <sys/open.h>
156 #include <sys/disp.h>
157 #include <vm/seg_map.h>
159 #include <sys/sunddi.h>
160 #include <sys/zmod.h>
161 #include <sys/id_space.h>
162 #include <sys/mkdev.h>
163 #include <sys/crypto/common.h>
164 #include <sys/crypto/api.h>
165 #include <sys/rctl.h>
166 #include <sys/vtoc.h>
167 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */
168 #include <sys/scsi/impl/uscsi.h>
169 #include <sys/sysevent/dev.h>
172 #define NBLOCKS_PROP_NAME "Nblocks"
173 #define SIZE_PROP_NAME "Size"
174 #define ZONE_PROP_NAME "zone"
176 #define SETUP_C_DATA(cd, buf, len) \
177 (cd).cd_format = CRYPTO_DATA_RAW; \
178 (cd).cd_offset = 0; \
179 (cd).cd_miscdata = NULL; \
180 (cd).cd_length = (len); \
181 (cd).cd_raw.iov_base = (buf); \
182 (cd).cd_raw.iov_len = (len);
184 #define UIO_CHECK(uio) \
185 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \
186 ((uio)->uio_resid % DEV_BSIZE) != 0) { \
190 #define LOFI_TIMEOUT 30
192 static void *lofi_statep
;
193 static kmutex_t lofi_lock
; /* state lock */
194 static id_space_t
*lofi_id
; /* lofi ID values */
195 static list_t lofi_list
;
196 static zone_key_t lofi_zone_key
;
199 * Because lofi_taskq_nthreads limits the actual swamping of the device, the
200 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
201 * high. If we want to be assured that the underlying device is always busy,
202 * we must be sure that the number of bytes enqueued when the number of
203 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for
204 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should
205 * set maxalloc to be the maximum throughput (in bytes per second) of the
206 * underlying device divided by the minimum I/O size. We assume a realistic
207 * maximum throughput of one hundred megabytes per second; we set maxalloc on
208 * the lofi task queue to be 104857600 divided by DEV_BSIZE.
210 static int lofi_taskq_maxalloc
= 104857600 / DEV_BSIZE
;
211 static int lofi_taskq_nthreads
= 4; /* # of taskq threads per device */
213 const char lofi_crypto_magic
[6] = LOFI_CRYPTO_MAGIC
;
216 * To avoid decompressing data in a compressed segment multiple times
217 * when accessing small parts of a segment's data, we cache and reuse
218 * the uncompressed segment's data.
220 * A single cached segment is sufficient to avoid lots of duplicate
221 * segment decompress operations. A small cache size also reduces the
224 * lofi_max_comp_cache is the maximum number of decompressed data segments
225 * cached for each compressed lofi image. It can be set to 0 to disable
229 uint32_t lofi_max_comp_cache
= 1;
231 static int gzip_decompress(void *src
, size_t srclen
, void *dst
,
232 size_t *destlen
, int level
);
234 static int lzma_decompress(void *src
, size_t srclen
, void *dst
,
235 size_t *dstlen
, int level
);
237 lofi_compress_info_t lofi_compress_table
[LOFI_COMPRESS_FUNCTIONS
] = {
238 {gzip_decompress
, NULL
, 6, "gzip"}, /* default */
239 {gzip_decompress
, NULL
, 6, "gzip-6"},
240 {gzip_decompress
, NULL
, 9, "gzip-9"},
241 {lzma_decompress
, NULL
, 0, "lzma"}
244 static void lofi_strategy_task(void *);
245 static int lofi_tg_rdwr(dev_info_t
*, uchar_t
, void *, diskaddr_t
,
247 static int lofi_tg_getinfo(dev_info_t
*, int, void *, void *);
249 struct cmlb_tg_ops lofi_tg_ops
= {
257 *SzAlloc(void *p
, size_t size
)
259 return (kmem_alloc(size
, KM_SLEEP
));
264 SzFree(void *p
, void *address
, size_t size
)
266 kmem_free(address
, size
);
269 static ISzAlloc g_Alloc
= { SzAlloc
, SzFree
};
272 * Free data referenced by the linked list of cached uncompressed
276 lofi_free_comp_cache(struct lofi_state
*lsp
)
278 struct lofi_comp_cache
*lc
;
280 while ((lc
= list_remove_head(&lsp
->ls_comp_cache
)) != NULL
) {
281 kmem_free(lc
->lc_data
, lsp
->ls_uncomp_seg_sz
);
282 kmem_free(lc
, sizeof (struct lofi_comp_cache
));
283 lsp
->ls_comp_cache_count
--;
285 ASSERT(lsp
->ls_comp_cache_count
== 0);
289 is_opened(struct lofi_state
*lsp
)
292 boolean_t last
= B_TRUE
;
294 ASSERT(MUTEX_HELD(&lofi_lock
));
295 for (i
= 0; i
< LOFI_PART_MAX
; i
++) {
296 if (lsp
->ls_open_lyr
[i
]) {
302 for (i
= 0; last
&& (i
< OTYP_LYR
); i
++) {
303 if (lsp
->ls_open_reg
[i
]) {
312 lofi_set_cleanup(struct lofi_state
*lsp
)
314 ASSERT(MUTEX_HELD(&lofi_lock
));
316 lsp
->ls_cleanup
= B_TRUE
;
318 /* wake up any threads waiting on dkiocstate */
319 cv_broadcast(&lsp
->ls_vp_cv
);
323 lofi_free_crypto(struct lofi_state
*lsp
)
325 ASSERT(MUTEX_HELD(&lofi_lock
));
327 if (lsp
->ls_crypto_enabled
) {
329 * Clean up the crypto state so that it doesn't hang around
330 * in memory after we are done with it.
332 if (lsp
->ls_key
.ck_data
!= NULL
) {
333 bzero(lsp
->ls_key
.ck_data
,
334 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
));
335 kmem_free(lsp
->ls_key
.ck_data
,
336 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
));
337 lsp
->ls_key
.ck_data
= NULL
;
338 lsp
->ls_key
.ck_length
= 0;
341 if (lsp
->ls_mech
.cm_param
!= NULL
) {
342 kmem_free(lsp
->ls_mech
.cm_param
,
343 lsp
->ls_mech
.cm_param_len
);
344 lsp
->ls_mech
.cm_param
= NULL
;
345 lsp
->ls_mech
.cm_param_len
= 0;
348 if (lsp
->ls_iv_mech
.cm_param
!= NULL
) {
349 kmem_free(lsp
->ls_iv_mech
.cm_param
,
350 lsp
->ls_iv_mech
.cm_param_len
);
351 lsp
->ls_iv_mech
.cm_param
= NULL
;
352 lsp
->ls_iv_mech
.cm_param_len
= 0;
355 mutex_destroy(&lsp
->ls_crypto_lock
);
361 lofi_tg_rdwr(dev_info_t
*dip
, uchar_t cmd
, void *bufaddr
, diskaddr_t start
,
362 size_t length
, void *tg_cookie
)
364 struct lofi_state
*lsp
;
369 instance
= ddi_get_instance(dip
);
370 if (instance
== 0) /* control node does not have disk */
373 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
378 if (cmd
!= TG_READ
&& cmd
!= TG_WRITE
)
382 * Make sure the mapping is set up by checking lsp->ls_vp_ready.
384 mutex_enter(&lsp
->ls_vp_lock
);
385 while (lsp
->ls_vp_ready
== B_FALSE
)
386 cv_wait(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
);
387 mutex_exit(&lsp
->ls_vp_lock
);
389 if (P2PHASE(length
, (1U << lsp
->ls_lbshift
)) != 0) {
390 /* We can only transfer whole blocks at a time! */
394 bp
= getrbuf(KM_SLEEP
);
396 if (cmd
== TG_READ
) {
397 bp
->b_flags
= B_READ
;
399 if (lsp
->ls_readonly
== B_TRUE
) {
403 bp
->b_flags
= B_WRITE
;
406 bp
->b_un
.b_addr
= bufaddr
;
407 bp
->b_bcount
= length
;
408 bp
->b_lblkno
= start
;
409 bp
->b_private
= NULL
;
410 bp
->b_edev
= lsp
->ls_dev
;
413 mutex_enter(lsp
->ls_kstat
->ks_lock
);
414 kstat_waitq_enter(KSTAT_IO_PTR(lsp
->ls_kstat
));
415 mutex_exit(lsp
->ls_kstat
->ks_lock
);
417 (void) taskq_dispatch(lsp
->ls_taskq
, lofi_strategy_task
, bp
, KM_SLEEP
);
426 * Get device geometry info for cmlb.
428 * We have mapped disk image as virtual block device and have to report
429 * physical/virtual geometry to cmlb.
431 * So we have two principal cases:
432 * 1. Uninitialised image without any existing labels,
433 * for this case we fabricate the data based on mapped image.
434 * 2. Image with existing label information.
435 * Since we have no information how the image was created (it may be
436 * dump from some physical device), we need to rely on label information
437 * from image, or we get "corrupted label" errors.
438 * NOTE: label can be MBR, MBR+SMI, GPT
441 lofi_tg_getinfo(dev_info_t
*dip
, int cmd
, void *arg
, void *tg_cookie
)
443 struct lofi_state
*lsp
;
447 _NOTE(ARGUNUSED(tg_cookie
));
448 instance
= ddi_get_instance(dip
);
449 if (instance
== 0) /* control device has no storage */
452 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
458 * Make sure the mapping is set up by checking lsp->ls_vp_ready.
460 * When mapping is created, new lofi instance is created and
461 * lofi_attach() will call cmlb_attach() as part of the procedure
462 * to set the mapping up. This chain of events will happen in
464 * Since cmlb_attach() will call lofi_tg_getinfo to get
465 * capacity, we return error on that call if cookie is set,
466 * otherwise lofi_attach will be stuck as the mapping is not yet
467 * finalized and lofi is not yet ready.
468 * Note, such error is not fatal for cmlb, as the label setup
469 * will be finalized when cmlb_validate() is called.
471 mutex_enter(&lsp
->ls_vp_lock
);
472 if (tg_cookie
!= NULL
&& lsp
->ls_vp_ready
== B_FALSE
) {
473 mutex_exit(&lsp
->ls_vp_lock
);
476 while (lsp
->ls_vp_ready
== B_FALSE
)
477 cv_wait(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
);
478 mutex_exit(&lsp
->ls_vp_lock
);
480 ashift
= lsp
->ls_lbshift
;
483 case TG_GETPHYGEOM
: {
484 cmlb_geom_t
*geomp
= arg
;
487 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >> ashift
;
488 geomp
->g_nsect
= lsp
->ls_dkg
.dkg_nsect
;
489 geomp
->g_nhead
= lsp
->ls_dkg
.dkg_nhead
;
490 geomp
->g_acyl
= lsp
->ls_dkg
.dkg_acyl
;
491 geomp
->g_ncyl
= lsp
->ls_dkg
.dkg_ncyl
;
492 geomp
->g_secsize
= (1U << ashift
);
493 geomp
->g_intrlv
= lsp
->ls_dkg
.dkg_intrlv
;
494 geomp
->g_rpm
= lsp
->ls_dkg
.dkg_rpm
;
500 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >> ashift
;
503 case TG_GETBLOCKSIZE
:
504 *(uint32_t *)arg
= (1U << ashift
);
508 tg_attribute_t
*tgattr
= arg
;
510 tgattr
->media_is_writable
= !lsp
->ls_readonly
;
511 tgattr
->media_is_solid_state
= B_FALSE
;
512 tgattr
->media_is_rotational
= B_FALSE
;
522 lofi_destroy(struct lofi_state
*lsp
, cred_t
*credp
)
524 int id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
527 ASSERT(MUTEX_HELD(&lofi_lock
));
530 * Before we can start to release the other resources,
531 * make sure we have all tasks completed and taskq removed.
533 if (lsp
->ls_taskq
!= NULL
) {
534 taskq_destroy(lsp
->ls_taskq
);
535 lsp
->ls_taskq
= NULL
;
538 list_remove(&lofi_list
, lsp
);
540 lofi_free_crypto(lsp
);
543 * Free pre-allocated compressed buffers
545 if (lsp
->ls_comp_bufs
!= NULL
) {
546 for (i
= 0; i
< lofi_taskq_nthreads
; i
++) {
547 if (lsp
->ls_comp_bufs
[i
].bufsize
> 0)
548 kmem_free(lsp
->ls_comp_bufs
[i
].buf
,
549 lsp
->ls_comp_bufs
[i
].bufsize
);
551 kmem_free(lsp
->ls_comp_bufs
,
552 sizeof (struct compbuf
) * lofi_taskq_nthreads
);
555 if (lsp
->ls_vp
!= NULL
) {
556 (void) fop_putpage(lsp
->ls_vp
, 0, 0, B_INVAL
, credp
, NULL
);
557 (void) fop_close(lsp
->ls_vp
, lsp
->ls_openflag
,
561 if (lsp
->ls_stacked_vp
!= lsp
->ls_vp
)
562 VN_RELE(lsp
->ls_stacked_vp
);
563 lsp
->ls_vp
= lsp
->ls_stacked_vp
= NULL
;
565 if (lsp
->ls_kstat
!= NULL
) {
566 kstat_delete(lsp
->ls_kstat
);
567 lsp
->ls_kstat
= NULL
;
571 * Free cached decompressed segment data
573 lofi_free_comp_cache(lsp
);
574 list_destroy(&lsp
->ls_comp_cache
);
576 if (lsp
->ls_uncomp_seg_sz
> 0) {
577 kmem_free(lsp
->ls_comp_index_data
, lsp
->ls_comp_index_data_sz
);
578 lsp
->ls_uncomp_seg_sz
= 0;
581 rctl_decr_lofi(lsp
->ls_zone
.zref_zone
, 1);
582 zone_rele_ref(&lsp
->ls_zone
, ZONE_REF_LOFI
);
584 mutex_destroy(&lsp
->ls_comp_cache_lock
);
585 mutex_destroy(&lsp
->ls_comp_bufs_lock
);
586 mutex_destroy(&lsp
->ls_kstat_lock
);
587 mutex_destroy(&lsp
->ls_vp_lock
);
588 cv_destroy(&lsp
->ls_vp_cv
);
589 lsp
->ls_vp_ready
= B_FALSE
;
590 lsp
->ls_vp_closereq
= B_FALSE
;
592 ASSERT(ddi_get_soft_state(lofi_statep
, id
) == lsp
);
593 (void) ndi_devi_offline(lsp
->ls_dip
, NDI_DEVI_REMOVE
);
594 id_free(lofi_id
, id
);
598 lofi_free_dev(struct lofi_state
*lsp
)
600 ASSERT(MUTEX_HELD(&lofi_lock
));
602 if (lsp
->ls_cmlbhandle
!= NULL
) {
603 cmlb_invalidate(lsp
->ls_cmlbhandle
, 0);
604 cmlb_detach(lsp
->ls_cmlbhandle
, 0);
605 cmlb_free_handle(&lsp
->ls_cmlbhandle
);
606 lsp
->ls_cmlbhandle
= NULL
;
608 (void) ddi_prop_remove_all(lsp
->ls_dip
);
609 ddi_remove_minor_node(lsp
->ls_dip
, NULL
);
614 lofi_zone_shutdown(zoneid_t zoneid
, void *arg
)
616 struct lofi_state
*lsp
;
617 struct lofi_state
*next
;
619 mutex_enter(&lofi_lock
);
621 for (lsp
= list_head(&lofi_list
); lsp
!= NULL
; lsp
= next
) {
623 /* lofi_destroy() frees lsp */
624 next
= list_next(&lofi_list
, lsp
);
626 if (lsp
->ls_zone
.zref_zone
->zone_id
!= zoneid
)
630 * No in-zone processes are running, but something has this
631 * open. It's either a global zone process, or a lofi
632 * mount. In either case we set ls_cleanup so the last
633 * user destroys the device.
635 if (is_opened(lsp
)) {
636 lofi_set_cleanup(lsp
);
639 lofi_destroy(lsp
, kcred
);
643 mutex_exit(&lofi_lock
);
648 lofi_open(dev_t
*devp
, int flag
, int otyp
, struct cred
*credp
)
657 struct lofi_state
*lsp
;
662 ndelay
= (flag
& (FNDELAY
| FNONBLOCK
)) ? B_TRUE
: B_FALSE
;
665 * lofiadm -a /dev/lofi/1 gets us here.
667 if (mutex_owner(&lofi_lock
) == curthread
)
670 mutex_enter(&lofi_lock
);
672 id
= LOFI_MINOR2ID(getminor(*devp
));
673 part
= LOFI_PART(getminor(*devp
));
676 /* master control device */
678 mutex_exit(&lofi_lock
);
682 /* otherwise, the mapping should already exist */
683 lsp
= ddi_get_soft_state(lofi_statep
, id
);
685 mutex_exit(&lofi_lock
);
689 if (lsp
->ls_cleanup
== B_TRUE
) {
690 mutex_exit(&lofi_lock
);
694 if (lsp
->ls_vp
== NULL
) {
695 mutex_exit(&lofi_lock
);
699 if (lsp
->ls_readonly
&& (flag
& FWRITE
)) {
700 mutex_exit(&lofi_lock
);
704 if ((lsp
->ls_open_excl
) & (mask
)) {
705 mutex_exit(&lofi_lock
);
710 if (lsp
->ls_open_lyr
[part
]) {
711 mutex_exit(&lofi_lock
);
714 for (int i
= 0; i
< OTYP_LYR
; i
++) {
715 if (lsp
->ls_open_reg
[i
] & mask
) {
716 mutex_exit(&lofi_lock
);
722 if (lsp
->ls_cmlbhandle
!= NULL
) {
723 if (cmlb_validate(lsp
->ls_cmlbhandle
, 0, 0) != 0) {
725 * non-blocking opens are allowed to succeed to
726 * support format and fdisk to create partitioning.
729 mutex_exit(&lofi_lock
);
732 } else if (cmlb_partinfo(lsp
->ls_cmlbhandle
, part
, &nblks
, &lba
,
733 NULL
, NULL
, 0) == 0) {
734 if ((!nblks
) && ((!ndelay
) || (otyp
!= OTYP_CHR
))) {
735 mutex_exit(&lofi_lock
);
738 } else if (!ndelay
) {
739 mutex_exit(&lofi_lock
);
744 if (otyp
== OTYP_LYR
) {
745 lsp
->ls_open_lyr
[part
]++;
747 lsp
->ls_open_reg
[otyp
] |= mask
;
750 lsp
->ls_open_excl
|= mask
;
753 mutex_exit(&lofi_lock
);
759 lofi_close(dev_t dev
, int flag
, int otyp
, struct cred
*credp
)
764 struct lofi_state
*lsp
;
766 id
= LOFI_MINOR2ID(getminor(dev
));
767 part
= LOFI_PART(getminor(dev
));
770 mutex_enter(&lofi_lock
);
771 lsp
= ddi_get_soft_state(lofi_statep
, id
);
773 mutex_exit(&lofi_lock
);
778 mutex_exit(&lofi_lock
);
782 if (lsp
->ls_open_excl
& mask
)
783 lsp
->ls_open_excl
&= ~mask
;
785 if (otyp
== OTYP_LYR
) {
786 lsp
->ls_open_lyr
[part
]--;
788 lsp
->ls_open_reg
[otyp
] &= ~mask
;
792 * If we forcibly closed the underlying device (li_force), or
793 * asked for cleanup (li_cleanup), finish up if we're the last
796 if (!is_opened(lsp
) &&
797 (lsp
->ls_cleanup
== B_TRUE
|| lsp
->ls_vp
== NULL
)) {
799 lofi_destroy(lsp
, credp
);
802 mutex_exit(&lofi_lock
);
807 * Sets the mechanism's initialization vector (IV) if one is needed.
808 * The IV is computed from the data block number. lsp->ls_mech is
810 * lsp->ls_mech.cm_param_len is set to the IV len.
811 * lsp->ls_mech.cm_param is set to the IV.
814 lofi_blk_mech(struct lofi_state
*lsp
, longlong_t lblkno
)
824 ASSERT(MUTEX_HELD(&lsp
->ls_crypto_lock
));
827 return (CRYPTO_DEVICE_ERROR
);
829 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */
830 if (lsp
->ls_iv_type
== IVM_NONE
) {
831 return (CRYPTO_SUCCESS
);
835 * if kmem already alloced from previous call and it's the same size
836 * we need now, just recycle it; allocate new kmem only if we have to
838 if (lsp
->ls_mech
.cm_param
== NULL
||
839 lsp
->ls_mech
.cm_param_len
!= lsp
->ls_iv_len
) {
840 iv_len
= lsp
->ls_iv_len
;
841 iv
= kmem_zalloc(iv_len
, KM_SLEEP
);
843 iv_len
= lsp
->ls_mech
.cm_param_len
;
844 iv
= lsp
->ls_mech
.cm_param
;
848 switch (lsp
->ls_iv_type
) {
850 /* iv is not static, lblkno changes each time */
852 datasz
= sizeof (lblkno
);
861 * write blkno into the iv buffer padded on the left in case
862 * blkno ever grows bigger than its current longlong_t size
863 * or a variation other than blkno is used for the iv data
865 min
= MIN(datasz
, iv_len
);
866 bcopy(data
, iv
+ (iv_len
- min
), min
);
868 /* encrypt the data in-place to get the IV */
869 SETUP_C_DATA(cdata
, iv
, iv_len
);
871 ret
= crypto_encrypt(&lsp
->ls_iv_mech
, &cdata
, &lsp
->ls_key
,
873 if (ret
!= CRYPTO_SUCCESS
) {
874 cmn_err(CE_WARN
, "failed to create iv for block %lld: (0x%x)",
876 if (lsp
->ls_mech
.cm_param
!= iv
)
877 kmem_free(iv
, iv_len
);
882 /* clean up the iv from the last computation */
883 if (lsp
->ls_mech
.cm_param
!= NULL
&& lsp
->ls_mech
.cm_param
!= iv
)
884 kmem_free(lsp
->ls_mech
.cm_param
, lsp
->ls_mech
.cm_param_len
);
886 lsp
->ls_mech
.cm_param_len
= iv_len
;
887 lsp
->ls_mech
.cm_param
= iv
;
889 return (CRYPTO_SUCCESS
);
893 * Performs encryption and decryption of a chunk of data of size "len",
894 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of
898 lofi_crypto(struct lofi_state
*lsp
, struct buf
*bp
, caddr_t plaintext
,
899 caddr_t ciphertext
, size_t len
, boolean_t op_encrypt
)
904 longlong_t lblkno
= bp
->b_lblkno
;
906 mutex_enter(&lsp
->ls_crypto_lock
);
909 * though we could encrypt/decrypt entire "len" chunk of data, we need
910 * to break it into DEV_BSIZE pieces to capture blkno incrementing
912 SETUP_C_DATA(cdata
, plaintext
, len
);
913 cdata
.cd_length
= DEV_BSIZE
;
914 if (ciphertext
!= NULL
) { /* not in-place crypto */
915 SETUP_C_DATA(wdata
, ciphertext
, len
);
916 wdata
.cd_length
= DEV_BSIZE
;
920 ret
= lofi_blk_mech(lsp
, lblkno
);
921 if (ret
!= CRYPTO_SUCCESS
)
925 ret
= crypto_encrypt(&lsp
->ls_mech
, &cdata
,
927 ((ciphertext
!= NULL
) ? &wdata
: NULL
), NULL
);
929 ret
= crypto_decrypt(&lsp
->ls_mech
, &cdata
,
931 ((ciphertext
!= NULL
) ? &wdata
: NULL
), NULL
);
934 cdata
.cd_offset
+= DEV_BSIZE
;
935 if (ciphertext
!= NULL
)
936 wdata
.cd_offset
+= DEV_BSIZE
;
938 } while (ret
== CRYPTO_SUCCESS
&& cdata
.cd_offset
< len
);
940 mutex_exit(&lsp
->ls_crypto_lock
);
942 if (ret
!= CRYPTO_SUCCESS
) {
943 cmn_err(CE_WARN
, "%s failed for block %lld: (0x%x)",
944 op_encrypt
? "crypto_encrypt()" : "crypto_decrypt()",
955 lofi_rdwr(caddr_t bufaddr
, offset_t offset
, struct buf
*bp
,
956 struct lofi_state
*lsp
, size_t len
, int method
, caddr_t bcopy_locn
)
963 * Handles reads/writes for both plain and encrypted lofi
964 * Note: offset is already shifted by lsp->ls_crypto_offset
968 isread
= bp
->b_flags
& B_READ
;
970 if (method
== RDWR_BCOPY
) {
971 /* DO NOT update bp->b_resid for bcopy */
972 bcopy(bcopy_locn
, bufaddr
, len
);
974 } else { /* RDWR_RAW */
975 error
= vn_rdwr(UIO_READ
, lsp
->ls_vp
, bufaddr
, len
,
976 offset
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
,
980 if (lsp
->ls_crypto_enabled
&& error
== 0) {
981 if (lofi_crypto(lsp
, bp
, bufaddr
, NULL
, len
,
982 B_FALSE
) != CRYPTO_SUCCESS
) {
984 * XXX: original code didn't set residual
985 * back to len because no error was expected
986 * from bcopy() if encryption is not enabled
988 if (method
!= RDWR_BCOPY
)
995 void *iobuf
= bufaddr
;
997 if (lsp
->ls_crypto_enabled
) {
998 /* don't do in-place crypto to keep bufaddr intact */
999 iobuf
= kmem_alloc(len
, KM_SLEEP
);
1000 if (lofi_crypto(lsp
, bp
, bufaddr
, iobuf
, len
,
1001 B_TRUE
) != CRYPTO_SUCCESS
) {
1002 kmem_free(iobuf
, len
);
1003 if (method
!= RDWR_BCOPY
)
1008 if (method
== RDWR_BCOPY
) {
1009 /* DO NOT update bp->b_resid for bcopy */
1010 bcopy(iobuf
, bcopy_locn
, len
);
1012 } else { /* RDWR_RAW */
1013 error
= vn_rdwr(UIO_WRITE
, lsp
->ls_vp
, iobuf
, len
,
1014 offset
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
,
1016 bp
->b_resid
= resid
;
1018 if (lsp
->ls_crypto_enabled
) {
1019 kmem_free(iobuf
, len
);
1026 lofi_mapped_rdwr(caddr_t bufaddr
, offset_t offset
, struct buf
*bp
,
1027 struct lofi_state
*lsp
)
1030 offset_t alignedoffset
, mapoffset
;
1040 * Note: offset is already shifted by lsp->ls_crypto_offset
1041 * when it gets here.
1043 if (lsp
->ls_crypto_enabled
)
1044 ASSERT(lsp
->ls_vp_comp_size
== lsp
->ls_vp_size
);
1047 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
1048 * an 8K boundary, but the buf transfer address may not be
1049 * aligned on more than a 512-byte boundary (we don't enforce
1050 * that even though we could). This matters since the initial
1051 * part of the transfer may not start at offset 0 within the
1052 * segmap'd chunk. So we have to compensate for that with
1053 * 'mapoffset'. Subsequent chunks always start off at the
1054 * beginning, and the last is capped by b_resid
1056 * Visually, where "|" represents page map boundaries:
1057 * alignedoffset (mapaddr begins at this segmap boundary)
1058 * | offset (from beginning of file)
1061 * ===|====X========|====...======|========X====|====
1062 * /-------------...---------------/
1063 * ^ bp->b_bcount/bp->b_resid at start
1064 * /----/--------/----...------/--------/
1066 * | | | | nth xfersize (<= MAXBSIZE)
1067 * | | 2nd thru n-1st xfersize (= MAXBSIZE)
1068 * | 1st xfersize (<= MAXBSIZE)
1069 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter)
1071 * Notes: "alignedoffset" is "offset" rounded down to nearest
1072 * MAXBSIZE boundary. "len" is next page boundary of size
1073 * PAGESIZE after "alignedoffset".
1075 mapoffset
= offset
& MAXBOFFSET
;
1076 alignedoffset
= offset
- mapoffset
;
1077 bp
->b_resid
= bp
->b_bcount
;
1078 isread
= bp
->b_flags
& B_READ
;
1079 srw
= isread
? S_READ
: S_WRITE
;
1081 xfersize
= MIN(lsp
->ls_vp_comp_size
- offset
,
1082 MIN(MAXBSIZE
- mapoffset
, bp
->b_resid
));
1083 len
= roundup(mapoffset
+ xfersize
, PAGESIZE
);
1084 mapaddr
= segmap_getmapflt(segkmap
, lsp
->ls_vp
,
1085 alignedoffset
, MAXBSIZE
, 1, srw
);
1087 * Now fault in the pages. This lets us check
1088 * for errors before we reference mapaddr and
1089 * try to resolve the fault in bcopy (which would
1090 * panic instead). And this can easily happen,
1091 * particularly if you've lofi'd a file over NFS
1092 * and someone deletes the file on the server.
1094 error
= segmap_fault(kas
.a_hat
, segkmap
, mapaddr
,
1095 len
, F_SOFTLOCK
, srw
);
1097 (void) segmap_release(segkmap
, mapaddr
, 0);
1098 if (FC_CODE(error
) == FC_OBJERR
)
1099 error
= FC_ERRNO(error
);
1104 /* error may be non-zero for encrypted lofi */
1105 error
= lofi_rdwr(bufaddr
, 0, bp
, lsp
, xfersize
,
1106 RDWR_BCOPY
, mapaddr
+ mapoffset
);
1108 bp
->b_resid
-= xfersize
;
1109 bufaddr
+= xfersize
;
1116 * If we're reading an entire page starting
1117 * at a page boundary, there's a good chance
1118 * we won't need it again. Put it on the
1119 * head of the freelist.
1121 if (mapoffset
== 0 && xfersize
== MAXBSIZE
)
1122 smflags
|= SM_DONTNEED
;
1125 * Write back good pages, it is okay to
1126 * always release asynchronous here as we'll
1127 * follow with fop_fsync for B_SYNC buffers.
1130 smflags
|= SM_WRITE
| SM_ASYNC
;
1132 (void) segmap_fault(kas
.a_hat
, segkmap
, mapaddr
,
1133 len
, F_SOFTUNLOCK
, srw
);
1134 save_error
= segmap_release(segkmap
, mapaddr
, smflags
);
1137 /* only the first map may start partial */
1139 alignedoffset
+= MAXBSIZE
;
1140 } while ((error
== 0) && (bp
->b_resid
> 0) &&
1141 (offset
< lsp
->ls_vp_comp_size
));
1147 * Check if segment seg_index is present in the decompressed segment
1150 * Returns a pointer to the decompressed segment data cache entry if
1151 * found, and NULL when decompressed data for this segment is not yet
1154 static struct lofi_comp_cache
*
1155 lofi_find_comp_data(struct lofi_state
*lsp
, uint64_t seg_index
)
1157 struct lofi_comp_cache
*lc
;
1159 ASSERT(MUTEX_HELD(&lsp
->ls_comp_cache_lock
));
1161 for (lc
= list_head(&lsp
->ls_comp_cache
); lc
!= NULL
;
1162 lc
= list_next(&lsp
->ls_comp_cache
, lc
)) {
1163 if (lc
->lc_index
== seg_index
) {
1165 * Decompressed segment data was found in the
1168 * The cache uses an LRU replacement strategy;
1169 * move the entry to head of list.
1171 list_remove(&lsp
->ls_comp_cache
, lc
);
1172 list_insert_head(&lsp
->ls_comp_cache
, lc
);
1180 * Add the data for a decompressed segment at segment index
1181 * seg_index to the cache of the decompressed segments.
1183 * Returns a pointer to the cache element structure in case
1184 * the data was added to the cache; returns NULL when the data
1187 static struct lofi_comp_cache
*
1188 lofi_add_comp_data(struct lofi_state
*lsp
, uint64_t seg_index
,
1191 struct lofi_comp_cache
*lc
;
1193 ASSERT(MUTEX_HELD(&lsp
->ls_comp_cache_lock
));
1195 while (lsp
->ls_comp_cache_count
> lofi_max_comp_cache
) {
1196 lc
= list_remove_tail(&lsp
->ls_comp_cache
);
1198 kmem_free(lc
->lc_data
, lsp
->ls_uncomp_seg_sz
);
1199 kmem_free(lc
, sizeof (struct lofi_comp_cache
));
1200 lsp
->ls_comp_cache_count
--;
1204 * Do not cache when disabled by tunable variable
1206 if (lofi_max_comp_cache
== 0)
1210 * When the cache has not yet reached the maximum allowed
1211 * number of segments, allocate a new cache element.
1212 * Otherwise the cache is full; reuse the last list element
1213 * (LRU) for caching the decompressed segment data.
1215 * The cache element for the new decompressed segment data is
1216 * added to the head of the list.
1218 if (lsp
->ls_comp_cache_count
< lofi_max_comp_cache
) {
1219 lc
= kmem_alloc(sizeof (struct lofi_comp_cache
), KM_SLEEP
);
1221 list_insert_head(&lsp
->ls_comp_cache
, lc
);
1222 lsp
->ls_comp_cache_count
++;
1224 lc
= list_remove_tail(&lsp
->ls_comp_cache
);
1227 list_insert_head(&lsp
->ls_comp_cache
, lc
);
1231 * Free old uncompressed segment data when reusing a cache
1234 if (lc
->lc_data
!= NULL
)
1235 kmem_free(lc
->lc_data
, lsp
->ls_uncomp_seg_sz
);
1238 lc
->lc_index
= seg_index
;
1245 gzip_decompress(void *src
, size_t srclen
, void *dst
,
1246 size_t *dstlen
, int level
)
1248 ASSERT(*dstlen
>= srclen
);
1250 if (z_uncompress(dst
, dstlen
, src
, srclen
) != Z_OK
)
1255 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8)
1258 lzma_decompress(void *src
, size_t srclen
, void *dst
,
1259 size_t *dstlen
, int level
)
1265 insizepure
= srclen
- LZMA_HEADER_SIZE
;
1266 actual_src
= (void *)((Byte
*)src
+ LZMA_HEADER_SIZE
);
1268 if (LzmaDecode((Byte
*)dst
, (size_t *)dstlen
,
1269 (const Byte
*)actual_src
, &insizepure
,
1270 (const Byte
*)src
, LZMA_PROPS_SIZE
, LZMA_FINISH_ANY
, &status
,
1271 &g_Alloc
) != SZ_OK
) {
1278 * This is basically what strategy used to be before we found we
1279 * needed task queues.
1282 lofi_strategy_task(void *arg
)
1284 struct buf
*bp
= (struct buf
*)arg
;
1287 struct lofi_state
*lsp
;
1292 boolean_t bufinited
= B_FALSE
;
1294 lsp
= ddi_get_soft_state(lofi_statep
,
1295 LOFI_MINOR2ID(getminor(bp
->b_edev
)));
1301 if (lsp
->ls_kstat
) {
1302 mutex_enter(lsp
->ls_kstat
->ks_lock
);
1303 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp
->ls_kstat
));
1304 mutex_exit(lsp
->ls_kstat
->ks_lock
);
1307 mutex_enter(&lsp
->ls_vp_lock
);
1308 lsp
->ls_vp_iocount
++;
1309 mutex_exit(&lsp
->ls_vp_lock
);
1312 bufaddr
= bp
->b_un
.b_addr
;
1313 offset
= (bp
->b_lblkno
+ (diskaddr_t
)(uintptr_t)bp
->b_private
)
1314 << lsp
->ls_lbshift
; /* offset within file */
1315 if (lsp
->ls_crypto_enabled
) {
1316 /* encrypted data really begins after crypto header */
1317 offset
+= lsp
->ls_crypto_offset
;
1322 if (lsp
->ls_vp
== NULL
|| lsp
->ls_vp_closereq
) {
1328 * If we're writing and the buffer was not B_ASYNC
1329 * we'll follow up with a fop_fsync() to force any
1330 * asynchronous I/O to stable storage.
1332 if (!(bp
->b_flags
& B_READ
) && !(bp
->b_flags
& B_ASYNC
))
1336 * We used to always use vn_rdwr here, but we cannot do that because
1337 * we might decide to read or write from the the underlying
1338 * file during this call, which would be a deadlock because
1339 * we have the rw_lock. So instead we page, unless it's not
1340 * mapable or it's a character device or it's an encrypted lofi.
1342 if ((lsp
->ls_vp
->v_flag
& VNOMAP
) || (lsp
->ls_vp
->v_type
== VCHR
) ||
1343 lsp
->ls_crypto_enabled
) {
1344 error
= lofi_rdwr(bufaddr
, offset
, bp
, lsp
, len
, RDWR_RAW
,
1346 } else if (lsp
->ls_uncomp_seg_sz
== 0) {
1347 error
= lofi_mapped_rdwr(bufaddr
, offset
, bp
, lsp
);
1349 uchar_t
*compressed_seg
= NULL
, *cmpbuf
;
1350 uchar_t
*uncompressed_seg
= NULL
;
1351 lofi_compress_info_t
*li
;
1354 uint64_t sblkno
, eblkno
, cmpbytes
;
1355 uint64_t uncompressed_seg_index
;
1356 struct lofi_comp_cache
*lc
;
1357 offset_t sblkoff
, eblkoff
;
1358 uoff_t salign
, ealign
;
1360 uint32_t comp_data_sz
;
1365 * From here on we're dealing primarily with compressed files
1367 ASSERT(!lsp
->ls_crypto_enabled
);
1370 * Compressed files can only be read from and
1373 if (!(bp
->b_flags
& B_READ
)) {
1374 bp
->b_resid
= bp
->b_bcount
;
1379 ASSERT(lsp
->ls_comp_algorithm_index
>= 0);
1380 li
= &lofi_compress_table
[lsp
->ls_comp_algorithm_index
];
1382 * Compute starting and ending compressed segment numbers
1383 * We use only bitwise operations avoiding division and
1384 * modulus because we enforce the compression segment size
1387 sblkno
= offset
>> lsp
->ls_comp_seg_shift
;
1388 sblkoff
= offset
& (lsp
->ls_uncomp_seg_sz
- 1);
1389 eblkno
= (offset
+ bp
->b_bcount
) >> lsp
->ls_comp_seg_shift
;
1390 eblkoff
= (offset
+ bp
->b_bcount
) & (lsp
->ls_uncomp_seg_sz
- 1);
1393 * Check the decompressed segment cache.
1395 * The cache is used only when the requested data
1396 * is within a segment. Requests that cross
1397 * segment boundaries bypass the cache.
1399 if (sblkno
== eblkno
||
1400 (sblkno
+ 1 == eblkno
&& eblkoff
== 0)) {
1402 * Request doesn't cross a segment boundary,
1403 * now check the cache.
1405 mutex_enter(&lsp
->ls_comp_cache_lock
);
1406 lc
= lofi_find_comp_data(lsp
, sblkno
);
1409 * We've found the decompressed segment
1410 * data in the cache; reuse it.
1412 bcopy(lc
->lc_data
+ sblkoff
, bufaddr
,
1414 mutex_exit(&lsp
->ls_comp_cache_lock
);
1419 mutex_exit(&lsp
->ls_comp_cache_lock
);
1423 * Align start offset to block boundary for segmap
1425 salign
= lsp
->ls_comp_seg_index
[sblkno
];
1426 sdiff
= salign
& (DEV_BSIZE
- 1);
1428 if (eblkno
>= (lsp
->ls_comp_index_sz
- 1)) {
1430 * We're dealing with the last segment of
1431 * the compressed file -- the size of this
1432 * segment *may not* be the same as the
1433 * segment size for the file
1435 eblkoff
= (offset
+ bp
->b_bcount
) &
1436 (lsp
->ls_uncomp_last_seg_sz
- 1);
1437 ealign
= lsp
->ls_vp_comp_size
;
1439 ealign
= lsp
->ls_comp_seg_index
[eblkno
+ 1];
1443 * Preserve original request paramaters
1445 oblkcount
= bp
->b_bcount
;
1448 * Assign the calculated parameters
1450 comp_data_sz
= ealign
- salign
;
1451 bp
->b_bcount
= comp_data_sz
;
1454 * Buffers to hold compressed segments are pre-allocated
1455 * on a per-thread basis. Find a pre-allocated buffer
1456 * that is not currently in use and mark it for use.
1458 mutex_enter(&lsp
->ls_comp_bufs_lock
);
1459 for (j
= 0; j
< lofi_taskq_nthreads
; j
++) {
1460 if (lsp
->ls_comp_bufs
[j
].inuse
== 0) {
1461 lsp
->ls_comp_bufs
[j
].inuse
= 1;
1466 mutex_exit(&lsp
->ls_comp_bufs_lock
);
1467 ASSERT(j
< lofi_taskq_nthreads
);
1470 * If the pre-allocated buffer size does not match
1471 * the size of the I/O request, re-allocate it with
1472 * the appropriate size
1474 if (lsp
->ls_comp_bufs
[j
].bufsize
< bp
->b_bcount
) {
1475 if (lsp
->ls_comp_bufs
[j
].bufsize
> 0)
1476 kmem_free(lsp
->ls_comp_bufs
[j
].buf
,
1477 lsp
->ls_comp_bufs
[j
].bufsize
);
1478 lsp
->ls_comp_bufs
[j
].buf
= kmem_alloc(bp
->b_bcount
,
1480 lsp
->ls_comp_bufs
[j
].bufsize
= bp
->b_bcount
;
1482 compressed_seg
= lsp
->ls_comp_bufs
[j
].buf
;
1485 * Map in the calculated number of blocks
1487 error
= lofi_mapped_rdwr((caddr_t
)compressed_seg
, salign
,
1490 bp
->b_bcount
= oblkcount
;
1491 bp
->b_resid
= oblkcount
;
1496 * decompress compressed blocks start
1498 cmpbuf
= compressed_seg
+ sdiff
;
1499 for (i
= sblkno
; i
<= eblkno
; i
++) {
1500 ASSERT(i
< lsp
->ls_comp_index_sz
- 1);
1504 * The last segment is special in that it is
1505 * most likely not going to be the same
1506 * (uncompressed) size as the other segments.
1508 if (i
== (lsp
->ls_comp_index_sz
- 2)) {
1509 seglen
= lsp
->ls_uncomp_last_seg_sz
;
1511 seglen
= lsp
->ls_uncomp_seg_sz
;
1515 * Each of the segment index entries contains
1516 * the starting block number for that segment.
1517 * The number of compressed bytes in a segment
1518 * is thus the difference between the starting
1519 * block number of this segment and the starting
1520 * block number of the next segment.
1522 cmpbytes
= lsp
->ls_comp_seg_index
[i
+ 1] -
1523 lsp
->ls_comp_seg_index
[i
];
1526 * The first byte in a compressed segment is a flag
1527 * that indicates whether this segment is compressed
1530 * The variable 'useg' is used (instead of
1531 * uncompressed_seg) in this loop to keep a
1532 * reference to the uncompressed segment.
1534 * N.B. If 'useg' is replaced with uncompressed_seg,
1535 * it leads to memory leaks and heap corruption in
1536 * corner cases where compressed segments lie
1537 * adjacent to uncompressed segments.
1539 if (*cmpbuf
== UNCOMPRESSED
) {
1540 useg
= cmpbuf
+ SEGHDR
;
1542 if (uncompressed_seg
== NULL
)
1544 kmem_alloc(lsp
->ls_uncomp_seg_sz
,
1546 useg
= uncompressed_seg
;
1547 uncompressed_seg_index
= i
;
1549 if (li
->l_decompress((cmpbuf
+ SEGHDR
),
1550 (cmpbytes
- SEGHDR
), uncompressed_seg
,
1551 &seglen
, li
->l_level
) != 0) {
1558 * Determine how much uncompressed data we
1559 * have to copy and copy it
1561 xfersize
= lsp
->ls_uncomp_seg_sz
- sblkoff
;
1563 xfersize
-= (lsp
->ls_uncomp_seg_sz
- eblkoff
);
1565 bcopy((useg
+ sblkoff
), bufaddr
, xfersize
);
1568 bufaddr
+= xfersize
;
1569 bp
->b_resid
-= xfersize
;
1572 if (bp
->b_resid
== 0)
1574 } /* decompress compressed blocks ends */
1577 * Skip to done if there is no uncompressed data to cache
1579 if (uncompressed_seg
== NULL
)
1583 * Add the data for the last decompressed segment to
1586 * In case the uncompressed segment data was added to (and
1587 * is referenced by) the cache, make sure we don't free it
1590 mutex_enter(&lsp
->ls_comp_cache_lock
);
1591 if ((lc
= lofi_add_comp_data(lsp
, uncompressed_seg_index
,
1592 uncompressed_seg
)) != NULL
) {
1593 uncompressed_seg
= NULL
;
1595 mutex_exit(&lsp
->ls_comp_cache_lock
);
1598 if (compressed_seg
!= NULL
) {
1599 mutex_enter(&lsp
->ls_comp_bufs_lock
);
1600 lsp
->ls_comp_bufs
[j
].inuse
= 0;
1601 mutex_exit(&lsp
->ls_comp_bufs_lock
);
1603 if (uncompressed_seg
!= NULL
)
1604 kmem_free(uncompressed_seg
, lsp
->ls_uncomp_seg_sz
);
1605 } /* end of handling compressed files */
1607 if ((error
== 0) && (syncflag
!= 0))
1608 error
= fop_fsync(lsp
->ls_vp
, syncflag
, kcred
, NULL
);
1611 if (bufinited
&& lsp
->ls_kstat
) {
1612 size_t n_done
= bp
->b_bcount
- bp
->b_resid
;
1615 mutex_enter(lsp
->ls_kstat
->ks_lock
);
1616 kioptr
= KSTAT_IO_PTR(lsp
->ls_kstat
);
1617 if (bp
->b_flags
& B_READ
) {
1618 kioptr
->nread
+= n_done
;
1621 kioptr
->nwritten
+= n_done
;
1624 kstat_runq_exit(kioptr
);
1625 mutex_exit(lsp
->ls_kstat
->ks_lock
);
1628 mutex_enter(&lsp
->ls_vp_lock
);
1629 if (--lsp
->ls_vp_iocount
== 0)
1630 cv_broadcast(&lsp
->ls_vp_cv
);
1631 mutex_exit(&lsp
->ls_vp_lock
);
1633 bioerror(bp
, error
);
1638 lofi_strategy(struct buf
*bp
)
1640 struct lofi_state
*lsp
;
1648 * We cannot just do I/O here, because the current thread
1649 * _might_ end up back in here because the underlying filesystem
1650 * wants a buffer, which eventually gets into bio_recycle and
1651 * might call into lofi to write out a delayed-write buffer.
1652 * This is bad if the filesystem above lofi is the same as below.
1654 * We could come up with a complex strategy using threads to
1655 * do the I/O asynchronously, or we could use task queues. task
1656 * queues were incredibly easy so they win.
1659 lsp
= ddi_get_soft_state(lofi_statep
,
1660 LOFI_MINOR2ID(getminor(bp
->b_edev
)));
1661 part
= LOFI_PART(getminor(bp
->b_edev
));
1664 bioerror(bp
, ENXIO
);
1669 /* Check if we are closing. */
1670 mutex_enter(&lsp
->ls_vp_lock
);
1671 if (lsp
->ls_vp
== NULL
|| lsp
->ls_vp_closereq
) {
1672 mutex_exit(&lsp
->ls_vp_lock
);
1677 mutex_exit(&lsp
->ls_vp_lock
);
1679 shift
= lsp
->ls_lbshift
;
1681 p_nblks
= lsp
->ls_vp_size
>> shift
;
1683 if (lsp
->ls_cmlbhandle
!= NULL
) {
1684 if (cmlb_partinfo(lsp
->ls_cmlbhandle
, part
, &p_nblks
, &p_lba
,
1686 bioerror(bp
, ENXIO
);
1692 /* start block past partition end? */
1693 if (bp
->b_lblkno
> p_nblks
) {
1694 bioerror(bp
, ENXIO
);
1699 offset
= (bp
->b_lblkno
+p_lba
) << shift
; /* offset within file */
1701 mutex_enter(&lsp
->ls_vp_lock
);
1702 if (lsp
->ls_crypto_enabled
) {
1703 /* encrypted data really begins after crypto header */
1704 offset
+= lsp
->ls_crypto_offset
;
1707 /* make sure we will not pass the file or partition size */
1708 if (offset
== lsp
->ls_vp_size
||
1709 offset
== (((p_lba
+ p_nblks
) << shift
) + lsp
->ls_crypto_offset
)) {
1711 if ((bp
->b_flags
& B_READ
) != 0) {
1712 bp
->b_resid
= bp
->b_bcount
;
1715 /* writes should fail */
1716 bioerror(bp
, ENXIO
);
1719 mutex_exit(&lsp
->ls_vp_lock
);
1722 if ((offset
> lsp
->ls_vp_size
) ||
1723 (offset
> (((p_lba
+ p_nblks
) << shift
) + lsp
->ls_crypto_offset
)) ||
1724 ((offset
+ bp
->b_bcount
) > ((p_lba
+ p_nblks
) << shift
))) {
1725 bioerror(bp
, ENXIO
);
1727 mutex_exit(&lsp
->ls_vp_lock
);
1731 mutex_exit(&lsp
->ls_vp_lock
);
1733 if (lsp
->ls_kstat
) {
1734 mutex_enter(lsp
->ls_kstat
->ks_lock
);
1735 kstat_waitq_enter(KSTAT_IO_PTR(lsp
->ls_kstat
));
1736 mutex_exit(lsp
->ls_kstat
->ks_lock
);
1738 bp
->b_private
= (void *)(uintptr_t)p_lba
; /* partition start */
1739 (void) taskq_dispatch(lsp
->ls_taskq
, lofi_strategy_task
, bp
, KM_SLEEP
);
1745 lofi_read(dev_t dev
, struct uio
*uio
, struct cred
*credp
)
1747 if (getminor(dev
) == 0)
1750 return (physio(lofi_strategy
, NULL
, dev
, B_READ
, minphys
, uio
));
1755 lofi_write(dev_t dev
, struct uio
*uio
, struct cred
*credp
)
1757 if (getminor(dev
) == 0)
1760 return (physio(lofi_strategy
, NULL
, dev
, B_WRITE
, minphys
, uio
));
1765 lofi_aread(dev_t dev
, struct aio_req
*aio
, struct cred
*credp
)
1767 if (getminor(dev
) == 0)
1769 UIO_CHECK(aio
->aio_uio
);
1770 return (aphysio(lofi_strategy
, anocancel
, dev
, B_READ
, minphys
, aio
));
1775 lofi_awrite(dev_t dev
, struct aio_req
*aio
, struct cred
*credp
)
1777 if (getminor(dev
) == 0)
1779 UIO_CHECK(aio
->aio_uio
);
1780 return (aphysio(lofi_strategy
, anocancel
, dev
, B_WRITE
, minphys
, aio
));
1785 lofi_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
1787 struct lofi_state
*lsp
;
1788 dev_t dev
= (dev_t
)arg
;
1791 instance
= LOFI_MINOR2ID(getminor(dev
));
1793 case DDI_INFO_DEVT2DEVINFO
:
1794 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
1796 return (DDI_FAILURE
);
1797 *result
= lsp
->ls_dip
;
1798 return (DDI_SUCCESS
);
1799 case DDI_INFO_DEVT2INSTANCE
:
1800 *result
= (void *) (intptr_t)instance
;
1801 return (DDI_SUCCESS
);
1803 return (DDI_FAILURE
);
1807 lofi_create_minor_nodes(struct lofi_state
*lsp
, boolean_t labeled
)
1810 int instance
= ddi_get_instance(lsp
->ls_dip
);
1812 if (labeled
== B_TRUE
) {
1813 cmlb_alloc_handle(&lsp
->ls_cmlbhandle
);
1814 error
= cmlb_attach(lsp
->ls_dip
, &lofi_tg_ops
, DTYPE_DIRECT
,
1815 B_FALSE
, B_FALSE
, DDI_NT_BLOCK_CHAN
,
1816 CMLB_CREATE_P0_MINOR_NODE
, lsp
->ls_cmlbhandle
, (void *)1);
1818 if (error
!= DDI_SUCCESS
) {
1819 cmlb_free_handle(&lsp
->ls_cmlbhandle
);
1820 lsp
->ls_cmlbhandle
= NULL
;
1824 /* create minor nodes */
1825 error
= ddi_create_minor_node(lsp
->ls_dip
, LOFI_BLOCK_NODE
,
1826 S_IFBLK
, LOFI_ID2MINOR(instance
), DDI_PSEUDO
, 0);
1827 if (error
== DDI_SUCCESS
) {
1828 error
= ddi_create_minor_node(lsp
->ls_dip
,
1829 LOFI_CHAR_NODE
, S_IFCHR
, LOFI_ID2MINOR(instance
),
1831 if (error
!= DDI_SUCCESS
) {
1832 ddi_remove_minor_node(lsp
->ls_dip
,
1843 lofi_zone_bind(struct lofi_state
*lsp
)
1847 mutex_enter(&curproc
->p_lock
);
1848 if ((error
= rctl_incr_lofi(curproc
, curproc
->p_zone
, 1)) != 0) {
1849 mutex_exit(&curproc
->p_lock
);
1852 mutex_exit(&curproc
->p_lock
);
1854 if (ddi_prop_update_string(DDI_DEV_T_NONE
, lsp
->ls_dip
, ZONE_PROP_NAME
,
1855 (char *)curproc
->p_zone
->zone_name
) != DDI_PROP_SUCCESS
) {
1856 rctl_decr_lofi(curproc
->p_zone
, 1);
1859 zone_init_ref(&lsp
->ls_zone
);
1860 zone_hold_ref(curzone
, &lsp
->ls_zone
, ZONE_REF_LOFI
);
1866 lofi_zone_unbind(struct lofi_state
*lsp
)
1868 (void) ddi_prop_remove(DDI_DEV_T_NONE
, lsp
->ls_dip
, ZONE_PROP_NAME
);
1869 rctl_decr_lofi(curproc
->p_zone
, 1);
1870 zone_rele_ref(&lsp
->ls_zone
, ZONE_REF_LOFI
);
1874 lofi_online_dev(dev_info_t
*dip
)
1878 int instance
= ddi_get_instance(dip
);
1879 struct lofi_state
*lsp
;
1882 if (ddi_prop_exists(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
, "labeled"))
1885 /* lsp alloc+init, soft state is freed in lofi_detach */
1886 error
= ddi_soft_state_zalloc(lofi_statep
, instance
);
1887 if (error
== DDI_FAILURE
) {
1891 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
1894 if ((error
= lofi_zone_bind(lsp
)) != 0)
1897 cv_init(&lsp
->ls_vp_cv
, NULL
, CV_DRIVER
, NULL
);
1898 mutex_init(&lsp
->ls_comp_cache_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1899 mutex_init(&lsp
->ls_comp_bufs_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1900 mutex_init(&lsp
->ls_kstat_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1901 mutex_init(&lsp
->ls_vp_lock
, NULL
, MUTEX_DRIVER
, NULL
);
1903 if ((error
= lofi_create_minor_nodes(lsp
, labeled
)) != 0) {
1904 lofi_zone_unbind(lsp
);
1908 /* driver handles kernel-issued IOCTLs */
1909 if (ddi_prop_create(DDI_DEV_T_NONE
, dip
, DDI_PROP_CANSLEEP
,
1910 DDI_KERNEL_IOCTL
, NULL
, 0) != DDI_PROP_SUCCESS
) {
1911 error
= DDI_FAILURE
;
1915 lsp
->ls_kstat
= kstat_create_zone(LOFI_DRIVER_NAME
, instance
,
1916 NULL
, "disk", KSTAT_TYPE_IO
, 1, 0, getzoneid());
1917 if (lsp
->ls_kstat
== NULL
) {
1918 (void) ddi_prop_remove(DDI_DEV_T_NONE
, lsp
->ls_dip
,
1924 lsp
->ls_kstat
->ks_lock
= &lsp
->ls_kstat_lock
;
1925 kstat_zone_add(lsp
->ls_kstat
, GLOBAL_ZONEID
);
1926 kstat_install(lsp
->ls_kstat
);
1927 return (DDI_SUCCESS
);
1929 if (lsp
->ls_cmlbhandle
!= NULL
) {
1930 cmlb_detach(lsp
->ls_cmlbhandle
, 0);
1931 cmlb_free_handle(&lsp
->ls_cmlbhandle
);
1933 ddi_remove_minor_node(dip
, NULL
);
1934 lofi_zone_unbind(lsp
);
1936 mutex_destroy(&lsp
->ls_comp_cache_lock
);
1937 mutex_destroy(&lsp
->ls_comp_bufs_lock
);
1938 mutex_destroy(&lsp
->ls_kstat_lock
);
1939 mutex_destroy(&lsp
->ls_vp_lock
);
1940 cv_destroy(&lsp
->ls_vp_cv
);
1942 ddi_soft_state_free(lofi_statep
, instance
);
1947 lofi_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
1950 int instance
= ddi_get_instance(dip
);
1951 struct lofi_state
*lsp
;
1953 if (cmd
!= DDI_ATTACH
)
1954 return (DDI_FAILURE
);
1957 * Instance 0 is control instance, attaching control instance
1958 * will set the lofi up and ready.
1960 if (instance
== 0) {
1961 rv
= ddi_soft_state_zalloc(lofi_statep
, 0);
1962 if (rv
== DDI_FAILURE
) {
1963 return (DDI_FAILURE
);
1965 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
1966 rv
= ddi_create_minor_node(dip
, LOFI_CTL_NODE
, S_IFCHR
, 0,
1968 if (rv
== DDI_FAILURE
) {
1969 ddi_soft_state_free(lofi_statep
, 0);
1970 return (DDI_FAILURE
);
1972 /* driver handles kernel-issued IOCTLs */
1973 if (ddi_prop_create(DDI_DEV_T_NONE
, dip
, DDI_PROP_CANSLEEP
,
1974 DDI_KERNEL_IOCTL
, NULL
, 0) != DDI_PROP_SUCCESS
) {
1975 ddi_remove_minor_node(dip
, NULL
);
1976 ddi_soft_state_free(lofi_statep
, 0);
1977 return (DDI_FAILURE
);
1980 zone_key_create(&lofi_zone_key
, NULL
, lofi_zone_shutdown
, NULL
);
1984 if (lofi_online_dev(dip
) == DDI_FAILURE
)
1985 return (DDI_FAILURE
);
1988 ddi_report_dev(dip
);
1989 return (DDI_SUCCESS
);
1993 lofi_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
1995 struct lofi_state
*lsp
;
1996 int instance
= ddi_get_instance(dip
);
1998 if (cmd
!= DDI_DETACH
)
1999 return (DDI_FAILURE
);
2002 * If the instance is not 0, release state.
2003 * The instance 0 is control device, we can not detach it
2004 * before other instances are detached.
2006 if (instance
!= 0) {
2007 lsp
= ddi_get_soft_state(lofi_statep
, instance
);
2008 if (lsp
!= NULL
&& lsp
->ls_vp_ready
== B_FALSE
) {
2009 ddi_soft_state_free(lofi_statep
, instance
);
2010 return (DDI_SUCCESS
);
2012 return (DDI_FAILURE
);
2014 mutex_enter(&lofi_lock
);
2016 if (!list_is_empty(&lofi_list
)) {
2017 mutex_exit(&lofi_lock
);
2018 return (DDI_FAILURE
);
2021 ddi_remove_minor_node(dip
, NULL
);
2022 ddi_prop_remove_all(dip
);
2024 mutex_exit(&lofi_lock
);
2026 if (zone_key_delete(lofi_zone_key
) != 0)
2027 cmn_err(CE_WARN
, "failed to delete zone key");
2029 ddi_soft_state_free(lofi_statep
, 0);
2031 return (DDI_SUCCESS
);
2035 * With the addition of encryption, we must be careful that encryption key is
2036 * wiped before kernel's data structures are freed so it cannot accidentally
2037 * slip out to userland through uninitialized data elsewhere.
2040 free_lofi_ioctl(struct lofi_ioctl
*klip
)
2042 /* Make sure this encryption key doesn't stick around */
2043 bzero(klip
->li_key
, sizeof (klip
->li_key
));
2044 kmem_free(klip
, sizeof (struct lofi_ioctl
));
2048 * These two functions simplify the rest of the ioctls that need to copyin/out
2049 * the lofi_ioctl structure.
2052 copy_in_lofi_ioctl(const struct lofi_ioctl
*ulip
, struct lofi_ioctl
**klipp
,
2055 struct lofi_ioctl
*klip
;
2058 klip
= *klipp
= kmem_alloc(sizeof (struct lofi_ioctl
), KM_SLEEP
);
2059 error
= ddi_copyin(ulip
, klip
, sizeof (struct lofi_ioctl
), flag
);
2063 /* ensure NULL termination */
2064 klip
->li_filename
[MAXPATHLEN
-1] = '\0';
2065 klip
->li_devpath
[MAXPATHLEN
-1] = '\0';
2066 klip
->li_algorithm
[MAXALGLEN
-1] = '\0';
2067 klip
->li_cipher
[CRYPTO_MAX_MECH_NAME
-1] = '\0';
2068 klip
->li_iv_cipher
[CRYPTO_MAX_MECH_NAME
-1] = '\0';
2070 if (klip
->li_id
> L_MAXMIN32
) {
2078 free_lofi_ioctl(klip
);
2083 copy_out_lofi_ioctl(const struct lofi_ioctl
*klip
, struct lofi_ioctl
*ulip
,
2089 * NOTE: Do NOT copy the crypto_key_t "back" to userland.
2090 * This ensures that an attacker can't trivially find the
2091 * key for a mapping just by issuing the ioctl.
2093 * It can still be found by poking around in kmem with mdb(1),
2094 * but there is no point in making it easy when the info isn't
2095 * of any use in this direction anyway.
2097 * Either way we don't actually have the raw key stored in
2098 * a form that we can get it anyway, since we just used it
2099 * to create a ctx template and didn't keep "the original".
2101 error
= ddi_copyout(klip
, ulip
, sizeof (struct lofi_ioctl
), flag
);
2108 lofi_access(struct lofi_state
*lsp
)
2110 ASSERT(MUTEX_HELD(&lofi_lock
));
2111 if (INGLOBALZONE(curproc
) || lsp
->ls_zone
.zref_zone
== curzone
)
2117 * Find the lofi state for the given filename. We compare by vnode to
2118 * allow the global zone visibility into NGZ lofi nodes.
2121 file_to_lofi_nocheck(char *filename
, boolean_t readonly
,
2122 struct lofi_state
**lspp
)
2124 struct lofi_state
*lsp
;
2129 ASSERT(MUTEX_HELD(&lofi_lock
));
2131 if ((err
= lookupname(filename
, UIO_SYSSPACE
, FOLLOW
,
2132 NULLVPP
, &vp
)) != 0)
2135 if (vp
->v_type
== VREG
) {
2137 if (fop_realvp(vp
, &realvp
, NULL
) == 0) {
2144 for (lsp
= list_head(&lofi_list
); lsp
!= NULL
;
2145 lsp
= list_next(&lofi_list
, lsp
)) {
2146 if (lsp
->ls_vp
== vp
) {
2149 if (lsp
->ls_readonly
) {
2151 /* Skip if '-r' is specified */
2162 * If a filename is given as an argument for lofi_unmap, we shouldn't
2163 * allow unmap if there are multiple read-only lofi devices associated
2169 else if (rdfiles
> 1)
2180 * Find the minor for the given filename, checking the zone can access
2184 file_to_lofi(char *filename
, boolean_t readonly
, struct lofi_state
**lspp
)
2188 ASSERT(MUTEX_HELD(&lofi_lock
));
2190 if ((err
= file_to_lofi_nocheck(filename
, readonly
, lspp
)) != 0)
2193 if ((err
= lofi_access(*lspp
)) != 0)
2200 * Fakes up a disk geometry based on the size of the file. This is needed
2201 * to support newfs on traditional lofi device, but also will provide
2202 * geometry hint for cmlb.
2205 fake_disk_geometry(struct lofi_state
*lsp
)
2207 uoff_t dsize
= lsp
->ls_vp_size
- lsp
->ls_crypto_offset
;
2209 /* dk_geom - see dkio(7I) */
2211 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
2212 * of sectors), but that breaks programs like fdisk which want to
2213 * partition a disk by cylinder. With one cylinder, you can't create
2214 * an fdisk partition and put pcfs on it for testing (hard to pick
2215 * a number between one and one).
2217 * The cheezy floppy test is an attempt to not have too few cylinders
2218 * for a small file, or so many on a big file that you waste space
2219 * for backup superblocks or cylinder group structures.
2221 bzero(&lsp
->ls_dkg
, sizeof (lsp
->ls_dkg
));
2222 if (dsize
< (2 * 1024 * 1024)) /* floppy? */
2223 lsp
->ls_dkg
.dkg_ncyl
= dsize
/ (100 * 1024);
2225 lsp
->ls_dkg
.dkg_ncyl
= dsize
/ (300 * 1024);
2226 /* in case file file is < 100k */
2227 if (lsp
->ls_dkg
.dkg_ncyl
== 0)
2228 lsp
->ls_dkg
.dkg_ncyl
= 1;
2230 lsp
->ls_dkg
.dkg_pcyl
= lsp
->ls_dkg
.dkg_ncyl
;
2231 lsp
->ls_dkg
.dkg_nhead
= 1;
2232 lsp
->ls_dkg
.dkg_rpm
= 7200;
2234 lsp
->ls_dkg
.dkg_nsect
= dsize
/
2235 (lsp
->ls_dkg
.dkg_ncyl
<< lsp
->ls_pbshift
);
2239 * build vtoc - see dkio(7I)
2241 * Fakes one big partition based on the size of the file. This is needed
2242 * because we allow newfs'ing the traditional lofi device and newfs will
2243 * do several disk ioctls to figure out the geometry and partition information.
2244 * It uses that information to determine the parameters to pass to mkfs.
2247 fake_disk_vtoc(struct lofi_state
*lsp
, struct vtoc
*vt
)
2249 bzero(vt
, sizeof (struct vtoc
));
2250 vt
->v_sanity
= VTOC_SANE
;
2251 vt
->v_version
= V_VERSION
;
2252 (void) strncpy(vt
->v_volume
, LOFI_DRIVER_NAME
,
2253 sizeof (vt
->v_volume
));
2254 vt
->v_sectorsz
= 1 << lsp
->ls_pbshift
;
2256 vt
->v_part
[0].p_tag
= V_UNASSIGNED
;
2259 * A compressed file is read-only, other files can
2262 if (lsp
->ls_uncomp_seg_sz
> 0) {
2263 vt
->v_part
[0].p_flag
= V_UNMNT
| V_RONLY
;
2265 vt
->v_part
[0].p_flag
= V_UNMNT
;
2267 vt
->v_part
[0].p_start
= (daddr_t
)0;
2269 * The partition size cannot just be the number of sectors, because
2270 * that might not end on a cylinder boundary. And if that's the case,
2271 * newfs/mkfs will print a scary warning. So just figure the size
2272 * based on the number of cylinders and sectors/cylinder.
2274 vt
->v_part
[0].p_size
= lsp
->ls_dkg
.dkg_pcyl
*
2275 lsp
->ls_dkg
.dkg_nsect
* lsp
->ls_dkg
.dkg_nhead
;
2279 * build dk_cinfo - see dkio(7I)
2282 fake_disk_info(dev_t dev
, struct dk_cinfo
*ci
)
2284 bzero(ci
, sizeof (struct dk_cinfo
));
2285 (void) strlcpy(ci
->dki_cname
, LOFI_DRIVER_NAME
, sizeof (ci
->dki_cname
));
2286 ci
->dki_ctype
= DKC_SCSI_CCS
;
2287 (void) strlcpy(ci
->dki_dname
, LOFI_DRIVER_NAME
, sizeof (ci
->dki_dname
));
2288 ci
->dki_unit
= LOFI_MINOR2ID(getminor(dev
));
2289 ci
->dki_partition
= LOFI_PART(getminor(dev
));
2291 * newfs uses this to set maxcontig. Must not be < 16, or it
2292 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
2293 * it by the block size. Then tunefs doesn't work because
2296 ci
->dki_maxtransfer
= 16;
2300 * map in a compressed file
2302 * Read in the header and the index that follows.
2304 * The header is as follows -
2306 * Signature (name of the compression algorithm)
2307 * Compression segment size (a multiple of 512)
2308 * Number of index entries
2309 * Size of the last block
2310 * The array containing the index entries
2312 * The header information is always stored in
2313 * network byte order on disk.
2316 lofi_map_compressed_file(struct lofi_state
*lsp
, char *buf
)
2318 uint32_t index_sz
, header_len
, i
;
2324 /* The signature has already been read */
2325 tbuf
+= sizeof (lsp
->ls_comp_algorithm
);
2326 bcopy(tbuf
, &(lsp
->ls_uncomp_seg_sz
), sizeof (lsp
->ls_uncomp_seg_sz
));
2327 lsp
->ls_uncomp_seg_sz
= ntohl(lsp
->ls_uncomp_seg_sz
);
2330 * The compressed segment size must be a power of 2
2332 if (lsp
->ls_uncomp_seg_sz
< DEV_BSIZE
||
2333 !ISP2(lsp
->ls_uncomp_seg_sz
))
2336 for (i
= 0; !((lsp
->ls_uncomp_seg_sz
>> i
) & 1); i
++)
2339 lsp
->ls_comp_seg_shift
= i
;
2341 tbuf
+= sizeof (lsp
->ls_uncomp_seg_sz
);
2342 bcopy(tbuf
, &(lsp
->ls_comp_index_sz
), sizeof (lsp
->ls_comp_index_sz
));
2343 lsp
->ls_comp_index_sz
= ntohl(lsp
->ls_comp_index_sz
);
2345 tbuf
+= sizeof (lsp
->ls_comp_index_sz
);
2346 bcopy(tbuf
, &(lsp
->ls_uncomp_last_seg_sz
),
2347 sizeof (lsp
->ls_uncomp_last_seg_sz
));
2348 lsp
->ls_uncomp_last_seg_sz
= ntohl(lsp
->ls_uncomp_last_seg_sz
);
2351 * Compute the total size of the uncompressed data
2352 * for use in fake_disk_geometry and other calculations.
2353 * Disk geometry has to be faked with respect to the
2354 * actual uncompressed data size rather than the
2355 * compressed file size.
2358 (uoff_t
)(lsp
->ls_comp_index_sz
- 2) * lsp
->ls_uncomp_seg_sz
2359 + lsp
->ls_uncomp_last_seg_sz
;
2362 * Index size is rounded up to DEV_BSIZE for ease
2365 index_sz
= sizeof (*lsp
->ls_comp_seg_index
) * lsp
->ls_comp_index_sz
;
2366 header_len
= sizeof (lsp
->ls_comp_algorithm
) +
2367 sizeof (lsp
->ls_uncomp_seg_sz
) +
2368 sizeof (lsp
->ls_comp_index_sz
) +
2369 sizeof (lsp
->ls_uncomp_last_seg_sz
);
2370 lsp
->ls_comp_offbase
= header_len
+ index_sz
;
2372 index_sz
+= header_len
;
2373 index_sz
= roundup(index_sz
, DEV_BSIZE
);
2375 lsp
->ls_comp_index_data
= kmem_alloc(index_sz
, KM_SLEEP
);
2376 lsp
->ls_comp_index_data_sz
= index_sz
;
2379 * Read in the index -- this has a side-effect
2380 * of reading in the header as well
2383 error
= vn_rdwr(rw
, lsp
->ls_vp
, lsp
->ls_comp_index_data
, index_sz
,
2384 0, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
, &resid
);
2389 /* Skip the header, this is where the index really begins */
2390 lsp
->ls_comp_seg_index
=
2392 (uint64_t *)(lsp
->ls_comp_index_data
+ header_len
);
2395 * Now recompute offsets in the index to account for
2398 for (i
= 0; i
< lsp
->ls_comp_index_sz
; i
++) {
2399 lsp
->ls_comp_seg_index
[i
] = lsp
->ls_comp_offbase
+
2400 BE_64(lsp
->ls_comp_seg_index
[i
]);
2407 lofi_init_crypto(struct lofi_state
*lsp
, struct lofi_ioctl
*klip
)
2409 struct crypto_meta chead
;
2410 char buf
[DEV_BSIZE
];
2417 if (!klip
->li_crypto_enabled
)
2421 * All current algorithms have a max of 448 bits.
2423 if (klip
->li_iv_len
> CRYPTO_BITS2BYTES(512))
2426 if (CRYPTO_BITS2BYTES(klip
->li_key_len
) > sizeof (klip
->li_key
))
2429 lsp
->ls_crypto_enabled
= klip
->li_crypto_enabled
;
2431 mutex_init(&lsp
->ls_crypto_lock
, NULL
, MUTEX_DRIVER
, NULL
);
2433 lsp
->ls_mech
.cm_type
= crypto_mech2id(klip
->li_cipher
);
2434 if (lsp
->ls_mech
.cm_type
== CRYPTO_MECH_INVALID
) {
2435 cmn_err(CE_WARN
, "invalid cipher %s requested for %s",
2436 klip
->li_cipher
, klip
->li_filename
);
2440 /* this is just initialization here */
2441 lsp
->ls_mech
.cm_param
= NULL
;
2442 lsp
->ls_mech
.cm_param_len
= 0;
2444 lsp
->ls_iv_type
= klip
->li_iv_type
;
2445 lsp
->ls_iv_mech
.cm_type
= crypto_mech2id(klip
->li_iv_cipher
);
2446 if (lsp
->ls_iv_mech
.cm_type
== CRYPTO_MECH_INVALID
) {
2447 cmn_err(CE_WARN
, "invalid iv cipher %s requested"
2448 " for %s", klip
->li_iv_cipher
, klip
->li_filename
);
2452 /* iv mech must itself take a null iv */
2453 lsp
->ls_iv_mech
.cm_param
= NULL
;
2454 lsp
->ls_iv_mech
.cm_param_len
= 0;
2455 lsp
->ls_iv_len
= klip
->li_iv_len
;
2458 * Create ctx using li_cipher & the raw li_key after checking
2459 * that it isn't a weak key.
2461 lsp
->ls_key
.ck_format
= CRYPTO_KEY_RAW
;
2462 lsp
->ls_key
.ck_length
= klip
->li_key_len
;
2463 lsp
->ls_key
.ck_data
= kmem_alloc(
2464 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
), KM_SLEEP
);
2465 bcopy(klip
->li_key
, lsp
->ls_key
.ck_data
,
2466 CRYPTO_BITS2BYTES(lsp
->ls_key
.ck_length
));
2468 ret
= crypto_key_check(&lsp
->ls_mech
, &lsp
->ls_key
);
2469 if (ret
!= CRYPTO_SUCCESS
) {
2470 cmn_err(CE_WARN
, "weak key check failed for cipher "
2471 "%s on file %s (0x%x)", klip
->li_cipher
,
2472 klip
->li_filename
, ret
);
2476 error
= vn_rdwr(UIO_READ
, lsp
->ls_vp
, buf
, DEV_BSIZE
,
2477 CRYOFF
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
, &resid
);
2482 * This is the case where the header in the lofi image is already
2483 * initialized to indicate it is encrypted.
2485 if (strncmp(buf
, lofi_crypto_magic
, sizeof (lofi_crypto_magic
)) == 0) {
2487 * The encryption header information is laid out this way:
2488 * 6 bytes: hex "CFLOFI"
2489 * 2 bytes: version = 0 ... for now
2490 * 96 bytes: reserved1 (not implemented yet)
2491 * 4 bytes: data_sector = 2 ... for now
2492 * more... not implemented yet
2497 /* copy the magic */
2498 bcopy(marker
, lsp
->ls_crypto
.magic
,
2499 sizeof (lsp
->ls_crypto
.magic
));
2500 marker
+= sizeof (lsp
->ls_crypto
.magic
);
2502 /* read the encryption version number */
2503 bcopy(marker
, &(lsp
->ls_crypto
.version
),
2504 sizeof (lsp
->ls_crypto
.version
));
2505 lsp
->ls_crypto
.version
= ntohs(lsp
->ls_crypto
.version
);
2506 marker
+= sizeof (lsp
->ls_crypto
.version
);
2508 /* read a chunk of reserved data */
2509 bcopy(marker
, lsp
->ls_crypto
.reserved1
,
2510 sizeof (lsp
->ls_crypto
.reserved1
));
2511 marker
+= sizeof (lsp
->ls_crypto
.reserved1
);
2513 /* read block number where encrypted data begins */
2514 bcopy(marker
, &(lsp
->ls_crypto
.data_sector
),
2515 sizeof (lsp
->ls_crypto
.data_sector
));
2516 lsp
->ls_crypto
.data_sector
= ntohl(lsp
->ls_crypto
.data_sector
);
2517 marker
+= sizeof (lsp
->ls_crypto
.data_sector
);
2519 /* and ignore the rest until it is implemented */
2521 lsp
->ls_crypto_offset
= lsp
->ls_crypto
.data_sector
* DEV_BSIZE
;
2526 * We've requested encryption, but no magic was found, so it must be
2530 for (i
= 0; i
< sizeof (struct crypto_meta
); i
++) {
2536 bcopy(lofi_crypto_magic
, marker
, sizeof (lofi_crypto_magic
));
2537 marker
+= sizeof (lofi_crypto_magic
);
2538 chead
.version
= htons(LOFI_CRYPTO_VERSION
);
2539 bcopy(&(chead
.version
), marker
, sizeof (chead
.version
));
2540 marker
+= sizeof (chead
.version
);
2541 marker
+= sizeof (chead
.reserved1
);
2542 chead
.data_sector
= htonl(LOFI_CRYPTO_DATA_SECTOR
);
2543 bcopy(&(chead
.data_sector
), marker
, sizeof (chead
.data_sector
));
2545 /* write the header */
2546 error
= vn_rdwr(UIO_WRITE
, lsp
->ls_vp
, buf
, DEV_BSIZE
,
2547 CRYOFF
, UIO_SYSSPACE
, 0, RLIM64_INFINITY
, kcred
, &resid
);
2551 /* fix things up so it looks like we read this info */
2552 bcopy(lofi_crypto_magic
, lsp
->ls_crypto
.magic
,
2553 sizeof (lofi_crypto_magic
));
2554 lsp
->ls_crypto
.version
= LOFI_CRYPTO_VERSION
;
2555 lsp
->ls_crypto
.data_sector
= LOFI_CRYPTO_DATA_SECTOR
;
2556 lsp
->ls_crypto_offset
= lsp
->ls_crypto
.data_sector
* DEV_BSIZE
;
2561 * Check to see if the passed in signature is a valid one. If it is
2562 * valid, return the index into lofi_compress_table.
2564 * Return -1 if it is invalid
2567 lofi_compress_select(const char *signature
)
2571 for (i
= 0; i
< LOFI_COMPRESS_FUNCTIONS
; i
++) {
2572 if (strcmp(lofi_compress_table
[i
].l_name
, signature
) == 0)
2580 lofi_init_compress(struct lofi_state
*lsp
)
2582 char buf
[DEV_BSIZE
];
2587 error
= vn_rdwr(UIO_READ
, lsp
->ls_vp
, buf
, DEV_BSIZE
, 0, UIO_SYSSPACE
,
2588 0, RLIM64_INFINITY
, kcred
, &resid
);
2593 if ((compress_index
= lofi_compress_select(buf
)) == -1)
2596 /* compression and encryption are mutually exclusive */
2597 if (lsp
->ls_crypto_enabled
)
2600 /* initialize compression info for compressed lofi */
2601 lsp
->ls_comp_algorithm_index
= compress_index
;
2602 (void) strlcpy(lsp
->ls_comp_algorithm
,
2603 lofi_compress_table
[compress_index
].l_name
,
2604 sizeof (lsp
->ls_comp_algorithm
));
2606 /* Finally setup per-thread pre-allocated buffers */
2607 lsp
->ls_comp_bufs
= kmem_zalloc(lofi_taskq_nthreads
*
2608 sizeof (struct compbuf
), KM_SLEEP
);
2610 return (lofi_map_compressed_file(lsp
, buf
));
2614 * Allocate new or proposed id from lofi_id.
2616 * Special cases for proposed id:
2617 * 0: not allowed, 0 is id for control device.
2618 * -1: allocate first usable id from lofi_id.
2619 * any other value is proposed value from userland
2621 * returns DDI_SUCCESS or errno.
2624 lofi_alloc_id(int *idp
)
2626 int id
, error
= DDI_SUCCESS
;
2629 id
= id_allocff_nosleep(lofi_id
);
2634 } else if (*idp
== 0) {
2637 } else if (*idp
> ((1 << (L_BITSMINOR
- LOFI_CMLB_SHIFT
)) - 1)) {
2641 if (ddi_get_soft_state(lofi_statep
, *idp
) != NULL
) {
2646 id
= id_alloc_specific_nosleep(lofi_id
, *idp
);
2658 lofi_create_dev(struct lofi_ioctl
*klip
)
2660 dev_info_t
*parent
, *child
;
2661 struct lofi_state
*lsp
= NULL
;
2662 char namebuf
[MAXNAMELEN
];
2665 /* get control device */
2666 lsp
= ddi_get_soft_state(lofi_statep
, 0);
2667 parent
= ddi_get_parent(lsp
->ls_dip
);
2669 if ((error
= lofi_alloc_id((int *)&klip
->li_id
)))
2672 (void) snprintf(namebuf
, sizeof (namebuf
), LOFI_DRIVER_NAME
"@%d",
2675 ndi_devi_enter(parent
, &circ
);
2676 child
= ndi_devi_findchild(parent
, namebuf
);
2677 ndi_devi_exit(parent
, circ
);
2679 if (child
== NULL
) {
2680 child
= ddi_add_child(parent
, LOFI_DRIVER_NAME
,
2681 (pnode_t
)DEVI_SID_NODEID
, klip
->li_id
);
2682 if ((error
= ddi_prop_update_int(DDI_DEV_T_NONE
, child
,
2683 "instance", klip
->li_id
)) != DDI_PROP_SUCCESS
)
2686 if (klip
->li_labeled
== B_TRUE
) {
2687 if ((error
= ddi_prop_create(DDI_DEV_T_NONE
, child
,
2688 DDI_PROP_CANSLEEP
, "labeled", 0, 0))
2689 != DDI_PROP_SUCCESS
)
2693 if ((error
= ndi_devi_online(child
, NDI_ONLINE_ATTACH
))
2697 id_free(lofi_id
, klip
->li_id
);
2705 ddi_prop_remove_all(child
);
2706 (void) ndi_devi_offline(child
, NDI_DEVI_REMOVE
);
2707 id_free(lofi_id
, klip
->li_id
);
2714 lofi_create_inquiry(struct lofi_state
*lsp
, struct scsi_inquiry
*inq
)
2718 (void) strlcpy(inq
->inq_vid
, LOFI_DRIVER_NAME
, sizeof (inq
->inq_vid
));
2720 mutex_enter(&lsp
->ls_vp_lock
);
2721 if (lsp
->ls_vp
!= NULL
)
2722 p
= strrchr(lsp
->ls_vp
->v_path
, '/');
2724 (void) strncpy(inq
->inq_pid
, p
+ 1, sizeof (inq
->inq_pid
));
2725 mutex_exit(&lsp
->ls_vp_lock
);
2726 (void) strlcpy(inq
->inq_revision
, "1.0", sizeof (inq
->inq_revision
));
2730 * copy devlink name from event cache
2733 lofi_copy_devpath(struct lofi_ioctl
*klip
)
2736 char namebuf
[MAXNAMELEN
], *str
;
2738 nvlist_t
*nvl
= NULL
;
2740 if (klip
->li_labeled
== B_TRUE
)
2741 klip
->li_devpath
[0] = '\0';
2743 /* no need to wait for messages */
2744 (void) snprintf(klip
->li_devpath
, sizeof (klip
->li_devpath
),
2745 "/dev/" LOFI_CHAR_NAME
"/%d", klip
->li_id
);
2749 (void) snprintf(namebuf
, sizeof (namebuf
), "%d", klip
->li_id
);
2750 ticks
= ddi_get_lbolt() + LOFI_TIMEOUT
* drv_usectohz(1000000);
2752 mutex_enter(&lofi_devlink_cache
.ln_lock
);
2753 error
= nvlist_lookup_nvlist(lofi_devlink_cache
.ln_data
, namebuf
, &nvl
);
2754 while (error
!= 0) {
2755 error
= cv_timedwait(&lofi_devlink_cache
.ln_cv
,
2756 &lofi_devlink_cache
.ln_lock
, ticks
);
2759 error
= nvlist_lookup_nvlist(lofi_devlink_cache
.ln_data
,
2764 if (nvlist_lookup_string(nvl
, DEV_NAME
, &str
) == 0) {
2765 (void) strlcpy(klip
->li_devpath
, str
,
2766 sizeof (klip
->li_devpath
));
2769 mutex_exit(&lofi_devlink_cache
.ln_lock
);
2773 * map a file to a minor number. Return the minor number.
2776 lofi_map_file(dev_t dev
, struct lofi_ioctl
*ulip
, int pickminor
,
2777 int *rvalp
, struct cred
*credp
, int ioctl_flag
)
2780 struct lofi_state
*lsp
= NULL
;
2781 struct lofi_ioctl
*klip
;
2783 struct vnode
*vp
= NULL
;
2786 char namebuf
[MAXNAMELEN
];
2788 error
= copy_in_lofi_ioctl(ulip
, &klip
, ioctl_flag
);
2792 mutex_enter(&lofi_lock
);
2794 if (file_to_lofi_nocheck(klip
->li_filename
, klip
->li_readonly
,
2800 flag
= FREAD
| FWRITE
| FOFFMAX
| FEXCL
;
2801 error
= vn_open(klip
->li_filename
, UIO_SYSSPACE
, flag
, 0, &vp
, 0, 0);
2805 error
= vn_open(klip
->li_filename
, UIO_SYSSPACE
, flag
, 0,
2811 if (!V_ISLOFIABLE(vp
->v_type
)) {
2816 vattr
.va_mask
= AT_SIZE
;
2817 error
= fop_getattr(vp
, &vattr
, 0, credp
, NULL
);
2821 /* the file needs to be a multiple of the block size */
2822 if ((vattr
.va_size
% DEV_BSIZE
) != 0) {
2828 klip
->li_id
= (uint32_t)-1;
2830 if ((error
= lofi_create_dev(klip
)) != 0)
2834 lsp
= ddi_get_soft_state(lofi_statep
, id
);
2839 * from this point lofi_destroy() is used to clean up on error
2840 * make sure the basic data is set
2842 list_insert_tail(&lofi_list
, lsp
);
2843 lsp
->ls_dev
= makedevice(getmajor(dev
), LOFI_ID2MINOR(id
));
2845 list_create(&lsp
->ls_comp_cache
, sizeof (struct lofi_comp_cache
),
2846 offsetof(struct lofi_comp_cache
, lc_list
));
2849 * save open mode so file can be closed properly and vnode counts
2850 * updated correctly.
2852 lsp
->ls_openflag
= flag
;
2855 lsp
->ls_stacked_vp
= vp
;
2857 lsp
->ls_vp_size
= vattr
.va_size
;
2858 lsp
->ls_vp_comp_size
= lsp
->ls_vp_size
;
2861 * Try to handle stacked lofs vnodes.
2863 if (vp
->v_type
== VREG
) {
2866 if (fop_realvp(vp
, &realvp
, NULL
) == 0) {
2868 * We need to use the realvp for uniqueness
2869 * checking, but keep the stacked vp for
2870 * LOFI_GET_FILENAME display.
2873 lsp
->ls_vp
= realvp
;
2877 lsp
->ls_lbshift
= highbit(DEV_BSIZE
) - 1;
2878 lsp
->ls_pbshift
= lsp
->ls_lbshift
;
2880 lsp
->ls_readonly
= klip
->li_readonly
;
2881 lsp
->ls_uncomp_seg_sz
= 0;
2882 lsp
->ls_comp_algorithm
[0] = '\0';
2883 lsp
->ls_crypto_offset
= 0;
2885 (void) snprintf(namebuf
, sizeof (namebuf
), "%s_taskq_%d",
2886 LOFI_DRIVER_NAME
, id
);
2887 lsp
->ls_taskq
= taskq_create_proc(namebuf
, lofi_taskq_nthreads
,
2888 minclsyspri
, 1, lofi_taskq_maxalloc
, curzone
->zone_zsched
, 0);
2890 if ((error
= lofi_init_crypto(lsp
, klip
)) != 0)
2893 if ((error
= lofi_init_compress(lsp
)) != 0)
2896 fake_disk_geometry(lsp
);
2898 /* For unlabeled lofi add Nblocks and Size */
2899 if (klip
->li_labeled
== B_FALSE
) {
2900 error
= ddi_prop_update_int64(lsp
->ls_dev
, lsp
->ls_dip
,
2901 SIZE_PROP_NAME
, lsp
->ls_vp_size
- lsp
->ls_crypto_offset
);
2902 if (error
!= DDI_PROP_SUCCESS
) {
2906 error
= ddi_prop_update_int64(lsp
->ls_dev
, lsp
->ls_dip
,
2908 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) / DEV_BSIZE
);
2909 if (error
!= DDI_PROP_SUCCESS
) {
2916 * Notify we are ready to rock.
2918 mutex_enter(&lsp
->ls_vp_lock
);
2919 lsp
->ls_vp_ready
= B_TRUE
;
2920 cv_broadcast(&lsp
->ls_vp_cv
);
2921 mutex_exit(&lsp
->ls_vp_lock
);
2922 mutex_exit(&lofi_lock
);
2924 lofi_copy_devpath(klip
);
2928 (void) copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
2929 free_lofi_ioctl(klip
);
2934 lofi_destroy(lsp
, credp
);
2937 (void) fop_putpage(vp
, 0, 0, B_INVAL
, credp
, NULL
);
2938 (void) fop_close(vp
, flag
, 1, 0, credp
, NULL
);
2943 mutex_exit(&lofi_lock
);
2944 free_lofi_ioctl(klip
);
2952 lofi_unmap_file(struct lofi_ioctl
*ulip
, int byfilename
,
2953 struct cred
*credp
, int ioctl_flag
)
2955 struct lofi_state
*lsp
;
2956 struct lofi_ioctl
*klip
;
2959 err
= copy_in_lofi_ioctl(ulip
, &klip
, ioctl_flag
);
2963 mutex_enter(&lofi_lock
);
2965 if ((err
= file_to_lofi(klip
->li_filename
, klip
->li_readonly
,
2969 } else if (klip
->li_id
== 0) {
2973 lsp
= ddi_get_soft_state(lofi_statep
, klip
->li_id
);
2976 if (lsp
== NULL
|| lsp
->ls_vp
== NULL
|| lofi_access(lsp
) != 0) {
2981 klip
->li_id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
2984 * If it's still held open, we'll do one of three things:
2986 * If no flag is set, just return EBUSY.
2988 * If the 'cleanup' flag is set, unmap and remove the device when
2989 * the last user finishes.
2991 * If the 'force' flag is set, then we forcibly close the underlying
2992 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl
2993 * will return DKIO_DEV_GONE. When the device is last closed, the
2994 * device will be cleaned up appropriately.
2996 * This is complicated by the fact that we may have outstanding
2997 * dispatched I/Os. Rather than having a single mutex to serialize all
2998 * I/O, we keep a count of the number of outstanding I/O requests
2999 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os
3000 * should be dispatched (ls_vp_closereq).
3002 * We set the flag, wait for the number of outstanding I/Os to reach 0,
3003 * and then close the underlying vnode.
3005 if (is_opened(lsp
)) {
3006 if (klip
->li_force
) {
3007 /* Mark the device for cleanup. */
3008 lofi_set_cleanup(lsp
);
3009 mutex_enter(&lsp
->ls_vp_lock
);
3010 lsp
->ls_vp_closereq
= B_TRUE
;
3011 /* Wake up any threads waiting on dkiocstate. */
3012 cv_broadcast(&lsp
->ls_vp_cv
);
3013 while (lsp
->ls_vp_iocount
> 0)
3014 cv_wait(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
);
3015 mutex_exit(&lsp
->ls_vp_lock
);
3016 } else if (klip
->li_cleanup
) {
3017 lofi_set_cleanup(lsp
);
3023 lofi_destroy(lsp
, credp
);
3027 mutex_exit(&lofi_lock
);
3029 (void) copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3030 free_lofi_ioctl(klip
);
3035 * get the filename given the minor number, or the minor number given
3040 lofi_get_info(dev_t dev
, struct lofi_ioctl
*ulip
, int which
,
3041 struct cred
*credp
, int ioctl_flag
)
3043 struct lofi_ioctl
*klip
;
3044 struct lofi_state
*lsp
;
3047 error
= copy_in_lofi_ioctl(ulip
, &klip
, ioctl_flag
);
3052 case LOFI_GET_FILENAME
:
3053 if (klip
->li_id
== 0) {
3054 free_lofi_ioctl(klip
);
3058 mutex_enter(&lofi_lock
);
3059 lsp
= ddi_get_soft_state(lofi_statep
, klip
->li_id
);
3060 if (lsp
== NULL
|| lofi_access(lsp
) != 0) {
3061 mutex_exit(&lofi_lock
);
3062 free_lofi_ioctl(klip
);
3067 * This may fail if, for example, we're trying to look
3068 * up a zoned NFS path from the global zone.
3070 if (vnodetopath(NULL
, lsp
->ls_stacked_vp
, klip
->li_filename
,
3071 sizeof (klip
->li_filename
), CRED()) != 0) {
3072 (void) strlcpy(klip
->li_filename
, "?",
3073 sizeof (klip
->li_filename
));
3076 klip
->li_readonly
= lsp
->ls_readonly
;
3077 klip
->li_labeled
= lsp
->ls_cmlbhandle
!= NULL
;
3079 (void) strlcpy(klip
->li_algorithm
, lsp
->ls_comp_algorithm
,
3080 sizeof (klip
->li_algorithm
));
3081 klip
->li_crypto_enabled
= lsp
->ls_crypto_enabled
;
3082 mutex_exit(&lofi_lock
);
3084 lofi_copy_devpath(klip
);
3085 error
= copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3086 free_lofi_ioctl(klip
);
3088 case LOFI_GET_MINOR
:
3089 mutex_enter(&lofi_lock
);
3090 error
= file_to_lofi(klip
->li_filename
,
3091 klip
->li_readonly
, &lsp
);
3093 mutex_exit(&lofi_lock
);
3094 free_lofi_ioctl(klip
);
3097 klip
->li_id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
3099 klip
->li_readonly
= lsp
->ls_readonly
;
3100 klip
->li_labeled
= lsp
->ls_cmlbhandle
!= NULL
;
3101 mutex_exit(&lofi_lock
);
3103 lofi_copy_devpath(klip
);
3104 error
= copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3106 free_lofi_ioctl(klip
);
3108 case LOFI_CHECK_COMPRESSED
:
3109 mutex_enter(&lofi_lock
);
3110 error
= file_to_lofi(klip
->li_filename
,
3111 klip
->li_readonly
, &lsp
);
3113 mutex_exit(&lofi_lock
);
3114 free_lofi_ioctl(klip
);
3118 klip
->li_id
= LOFI_MINOR2ID(getminor(lsp
->ls_dev
));
3119 (void) strlcpy(klip
->li_algorithm
, lsp
->ls_comp_algorithm
,
3120 sizeof (klip
->li_algorithm
));
3122 mutex_exit(&lofi_lock
);
3123 error
= copy_out_lofi_ioctl(klip
, ulip
, ioctl_flag
);
3124 free_lofi_ioctl(klip
);
3127 free_lofi_ioctl(klip
);
3133 uscsi_is_inquiry(intptr_t arg
, int flag
, union scsi_cdb
*cdb
,
3134 struct uscsi_cmd
*uscmd
)
3138 #ifdef _MULTI_DATAMODEL
3139 switch (ddi_model_convert_from(flag
& FMODELS
)) {
3140 case DDI_MODEL_ILP32
: {
3141 struct uscsi_cmd32 ucmd32
;
3143 if (ddi_copyin((void *)arg
, &ucmd32
, sizeof (ucmd32
), flag
)) {
3147 uscsi_cmd32touscsi_cmd((&ucmd32
), uscmd
);
3150 case DDI_MODEL_NONE
:
3151 if (ddi_copyin((void *)arg
, uscmd
, sizeof (*uscmd
), flag
)) {
3161 if (ddi_copyin((void *)arg
, uscmd
, sizeof (*uscmd
), flag
)) {
3165 #endif /* _MULTI_DATAMODEL */
3166 if (ddi_copyin(uscmd
->uscsi_cdb
, cdb
, uscmd
->uscsi_cdblen
, flag
)) {
3170 if (cdb
->scc_cmd
== SCMD_INQUIRY
) {
3178 lofi_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int flag
, cred_t
*credp
,
3182 enum dkio_state dkstate
;
3183 struct lofi_state
*lsp
;
3186 id
= LOFI_MINOR2ID(getminor(dev
));
3188 /* lofi ioctls only apply to the master device */
3190 struct lofi_ioctl
*lip
= (struct lofi_ioctl
*)arg
;
3193 * the query command only need read-access - i.e., normal
3194 * users are allowed to do those on the ctl device as
3195 * long as they can open it read-only.
3199 if ((flag
& FWRITE
) == 0)
3201 return (lofi_map_file(dev
, lip
, 1, rvalp
, credp
, flag
));
3202 case LOFI_MAP_FILE_MINOR
:
3203 if ((flag
& FWRITE
) == 0)
3205 return (lofi_map_file(dev
, lip
, 0, rvalp
, credp
, flag
));
3206 case LOFI_UNMAP_FILE
:
3207 if ((flag
& FWRITE
) == 0)
3209 return (lofi_unmap_file(lip
, 1, credp
, flag
));
3210 case LOFI_UNMAP_FILE_MINOR
:
3211 if ((flag
& FWRITE
) == 0)
3213 return (lofi_unmap_file(lip
, 0, credp
, flag
));
3214 case LOFI_GET_FILENAME
:
3215 return (lofi_get_info(dev
, lip
, LOFI_GET_FILENAME
,
3217 case LOFI_GET_MINOR
:
3218 return (lofi_get_info(dev
, lip
, LOFI_GET_MINOR
,
3222 * This API made limited sense when this value was fixed
3223 * at LOFI_MAX_FILES. However, its use to iterate
3224 * across all possible devices in lofiadm means we don't
3225 * want to return L_MAXMIN, but the highest
3228 case LOFI_GET_MAXMINOR
:
3231 mutex_enter(&lofi_lock
);
3233 for (lsp
= list_head(&lofi_list
); lsp
!= NULL
;
3234 lsp
= list_next(&lofi_list
, lsp
)) {
3236 if (lofi_access(lsp
) != 0)
3239 i
= ddi_get_instance(lsp
->ls_dip
);
3244 mutex_exit(&lofi_lock
);
3246 error
= ddi_copyout(&id
, &lip
->li_id
,
3252 case LOFI_CHECK_COMPRESSED
:
3253 return (lofi_get_info(dev
, lip
, LOFI_CHECK_COMPRESSED
,
3260 mutex_enter(&lofi_lock
);
3261 lsp
= ddi_get_soft_state(lofi_statep
, id
);
3262 if (lsp
== NULL
|| lsp
->ls_cleanup
) {
3263 mutex_exit(&lofi_lock
);
3266 mutex_exit(&lofi_lock
);
3268 if (ddi_prop_exists(DDI_DEV_T_ANY
, lsp
->ls_dip
, DDI_PROP_DONTPASS
,
3270 error
= cmlb_ioctl(lsp
->ls_cmlbhandle
, dev
, cmd
, arg
, flag
,
3272 if (error
!= ENOTTY
)
3277 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with
3278 * EIO as if the device was no longer present.
3280 if (lsp
->ls_vp
== NULL
&& cmd
!= DKIOCSTATE
)
3283 /* these are for faking out utilities like newfs */
3285 case DKIOCGMEDIAINFO
:
3286 case DKIOCGMEDIAINFOEXT
: {
3287 struct dk_minfo_ext media_info
;
3288 int shift
= lsp
->ls_lbshift
;
3291 if (cmd
== DKIOCGMEDIAINFOEXT
) {
3292 media_info
.dki_pbsize
= 1U << lsp
->ls_pbshift
;
3293 size
= sizeof (struct dk_minfo_ext
);
3295 size
= sizeof (struct dk_minfo
);
3298 media_info
.dki_media_type
= DK_FIXED_DISK
;
3299 media_info
.dki_lbsize
= 1U << shift
;
3300 media_info
.dki_capacity
=
3301 (lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >> shift
;
3303 if (ddi_copyout(&media_info
, (void *)arg
, size
, flag
))
3307 case DKIOCREMOVABLE
: {
3309 if (ddi_copyout(&i
, (caddr_t
)arg
, sizeof (int), flag
))
3316 fake_disk_vtoc(lsp
, &vt
);
3318 switch (ddi_model_convert_from(flag
& FMODELS
)) {
3319 case DDI_MODEL_ILP32
: {
3320 struct vtoc32 vtoc32
;
3322 vtoctovtoc32(vt
, vtoc32
);
3323 if (ddi_copyout(&vtoc32
, (void *)arg
,
3324 sizeof (struct vtoc32
), flag
))
3329 case DDI_MODEL_NONE
:
3330 if (ddi_copyout(&vt
, (void *)arg
,
3331 sizeof (struct vtoc
), flag
))
3339 fake_disk_info(dev
, &ci
);
3340 if (ddi_copyout(&ci
, (void *)arg
, sizeof (ci
), flag
))
3344 case DKIOCG_VIRTGEOM
:
3345 case DKIOCG_PHYGEOM
:
3347 error
= ddi_copyout(&lsp
->ls_dkg
, (void *)arg
,
3348 sizeof (struct dk_geom
), flag
);
3354 * Normally, lofi devices are always in the INSERTED state. If
3355 * a device is forcefully unmapped, then the device transitions
3356 * to the DKIO_DEV_GONE state.
3358 if (ddi_copyin((void *)arg
, &dkstate
, sizeof (dkstate
),
3362 mutex_enter(&lsp
->ls_vp_lock
);
3363 while (((dkstate
== DKIO_INSERTED
&& lsp
->ls_vp
!= NULL
) ||
3364 (dkstate
== DKIO_DEV_GONE
&& lsp
->ls_vp
== NULL
)) &&
3367 * By virtue of having the device open, we know that
3368 * 'lsp' will remain valid when we return.
3370 if (!cv_wait_sig(&lsp
->ls_vp_cv
, &lsp
->ls_vp_lock
)) {
3371 mutex_exit(&lsp
->ls_vp_lock
);
3376 dkstate
= (!lsp
->ls_cleanup
&& lsp
->ls_vp
!= NULL
?
3377 DKIO_INSERTED
: DKIO_DEV_GONE
);
3378 mutex_exit(&lsp
->ls_vp_lock
);
3380 if (ddi_copyout(&dkstate
, (void *)arg
,
3381 sizeof (dkstate
), flag
) != 0)
3385 struct uscsi_cmd uscmd
;
3388 if (uscsi_is_inquiry(arg
, flag
, &cdb
, &uscmd
) == 0) {
3389 struct scsi_inquiry inq
= {0};
3391 lofi_create_inquiry(lsp
, &inq
);
3392 if (ddi_copyout(&inq
, uscmd
.uscsi_bufaddr
,
3393 uscmd
.uscsi_buflen
, flag
) != 0)
3396 } else if (cdb
.scc_cmd
== SCMD_READ_CAPACITY
) {
3397 struct scsi_capacity capacity
;
3400 BE_32((lsp
->ls_vp_size
- lsp
->ls_crypto_offset
) >>
3402 capacity
.lbasize
= BE_32(1 << lsp
->ls_lbshift
);
3403 if (ddi_copyout(&capacity
, uscmd
.uscsi_bufaddr
,
3404 uscmd
.uscsi_buflen
, flag
) != 0)
3409 uscmd
.uscsi_rqstatus
= 0xff;
3410 #ifdef _MULTI_DATAMODEL
3411 switch (ddi_model_convert_from(flag
& FMODELS
)) {
3412 case DDI_MODEL_ILP32
: {
3413 struct uscsi_cmd32 ucmd32
;
3414 uscsi_cmdtouscsi_cmd32((&uscmd
), (&ucmd32
));
3415 if (ddi_copyout(&ucmd32
, (void *)arg
, sizeof (ucmd32
),
3420 case DDI_MODEL_NONE
:
3421 if (ddi_copyout(&uscmd
, (void *)arg
, sizeof (uscmd
),
3429 if (ddi_copyout(&uscmd
, (void *)arg
, sizeof (uscmd
), flag
) != 0)
3431 #endif /* _MULTI_DATAMODEL */
3436 cmn_err(CE_WARN
, "lofi_ioctl: %d is not implemented\n", cmd
);
3443 lofi_prop_op(dev_t dev
, dev_info_t
*dip
, ddi_prop_op_t prop_op
, int mod_flags
,
3444 char *name
, caddr_t valuep
, int *lengthp
)
3446 struct lofi_state
*lsp
;
3449 lsp
= ddi_get_soft_state(lofi_statep
, ddi_get_instance(dip
));
3451 return (ddi_prop_op(dev
, dip
, prop_op
, mod_flags
,
3452 name
, valuep
, lengthp
));
3455 rc
= cmlb_prop_op(lsp
->ls_cmlbhandle
, dev
, dip
, prop_op
, mod_flags
,
3456 name
, valuep
, lengthp
, LOFI_PART(getminor(dev
)), NULL
);
3457 if (rc
== DDI_PROP_SUCCESS
)
3460 return (ddi_prop_op(DDI_DEV_T_ANY
, dip
, prop_op
, mod_flags
,
3461 name
, valuep
, lengthp
));
3464 static struct cb_ops lofi_cb_ops
= {
3465 lofi_open
, /* open */
3466 lofi_close
, /* close */
3467 lofi_strategy
, /* strategy */
3470 lofi_read
, /* read */
3471 lofi_write
, /* write */
3472 lofi_ioctl
, /* ioctl */
3476 nochpoll
, /* poll */
3477 lofi_prop_op
, /* prop_op */
3479 D_64BIT
| D_NEW
| D_MP
, /* Driver compatibility flag */
3485 static struct dev_ops lofi_ops
= {
3486 DEVO_REV
, /* devo_rev, */
3488 lofi_info
, /* info */
3489 nulldev
, /* identify */
3490 nulldev
, /* probe */
3491 lofi_attach
, /* attach */
3492 lofi_detach
, /* detach */
3494 &lofi_cb_ops
, /* driver operations */
3495 NULL
, /* no bus operations */
3497 ddi_quiesce_not_needed
, /* quiesce */
3500 static struct modldrv modldrv
= {
3502 "loopback file driver",
3506 static struct modlinkage modlinkage
= {
3517 list_create(&lofi_list
, sizeof (struct lofi_state
),
3518 offsetof(struct lofi_state
, ls_list
));
3520 error
= ddi_soft_state_init((void **)&lofi_statep
,
3521 sizeof (struct lofi_state
), 0);
3523 list_destroy(&lofi_list
);
3528 * The minor number is stored as id << LOFI_CMLB_SHIFT as
3529 * we need to reserve space for cmlb minor numbers.
3530 * This will leave out 4096 id values on 32bit kernel, which should
3533 lofi_id
= id_space_create("lofi_id", 1,
3534 (1 << (L_BITSMINOR
- LOFI_CMLB_SHIFT
)));
3536 if (lofi_id
== NULL
) {
3537 ddi_soft_state_fini((void **)&lofi_statep
);
3538 list_destroy(&lofi_list
);
3539 return (DDI_FAILURE
);
3542 mutex_init(&lofi_lock
, NULL
, MUTEX_DRIVER
, NULL
);
3544 error
= mod_install(&modlinkage
);
3547 id_space_destroy(lofi_id
);
3548 mutex_destroy(&lofi_lock
);
3549 ddi_soft_state_fini((void **)&lofi_statep
);
3550 list_destroy(&lofi_list
);
3561 mutex_enter(&lofi_lock
);
3563 if (!list_is_empty(&lofi_list
)) {
3564 mutex_exit(&lofi_lock
);
3568 mutex_exit(&lofi_lock
);
3570 error
= mod_remove(&modlinkage
);
3574 mutex_destroy(&lofi_lock
);
3575 id_space_destroy(lofi_id
);
3576 ddi_soft_state_fini((void **)&lofi_statep
);
3577 list_destroy(&lofi_list
);
3583 _info(struct modinfo
*modinfop
)
3585 return (mod_info(&modlinkage
, modinfop
));