1 // SPDX-License-Identifier: GPL-2.0
3 * virtio-fs: Virtio Filesystem
4 * Copyright (C) 2018 Red Hat, Inc.
10 #include <linux/pfn_t.h>
11 #include <linux/module.h>
12 #include <linux/virtio.h>
13 #include <linux/virtio_fs.h>
14 #include <linux/delay.h>
15 #include <linux/fs_context.h>
16 #include <linux/fs_parser.h>
17 #include <linux/highmem.h>
18 #include <linux/uio.h>
21 /* List of virtio-fs device instances and a lock for the list. Also provides
22 * mutual exclusion in device removal and mounting path
24 static DEFINE_MUTEX(virtio_fs_mutex
);
25 static LIST_HEAD(virtio_fs_instances
);
32 #define VQ_NAME_LEN 24
34 /* Per-virtqueue state */
37 struct virtqueue
*vq
; /* protected by ->lock */
38 struct work_struct done_work
;
39 struct list_head queued_reqs
;
40 struct list_head end_reqs
; /* End these requests */
41 struct delayed_work dispatch_work
;
45 struct completion in_flight_zero
; /* No inflight requests */
46 char name
[VQ_NAME_LEN
];
47 } ____cacheline_aligned_in_smp
;
49 /* A virtio-fs device instance */
52 struct list_head list
; /* on virtio_fs_instances */
54 struct virtio_fs_vq
*vqs
;
55 unsigned int nvqs
; /* number of virtqueues */
56 unsigned int num_request_queues
; /* number of request queues */
57 struct dax_device
*dax_dev
;
59 /* DAX memory window where file contents are mapped */
61 phys_addr_t window_phys_addr
;
65 struct virtio_fs_forget_req
{
66 struct fuse_in_header ih
;
67 struct fuse_forget_in arg
;
70 struct virtio_fs_forget
{
71 /* This request can be temporarily queued on virt queue */
72 struct list_head list
;
73 struct virtio_fs_forget_req req
;
76 struct virtio_fs_req_work
{
78 struct virtio_fs_vq
*fsvq
;
79 struct work_struct done_work
;
82 static int virtio_fs_enqueue_req(struct virtio_fs_vq
*fsvq
,
83 struct fuse_req
*req
, bool in_flight
);
89 static const struct fs_parameter_spec virtio_fs_parameters
[] = {
90 fsparam_flag("dax", OPT_DAX
),
94 static int virtio_fs_parse_param(struct fs_context
*fc
,
95 struct fs_parameter
*param
)
97 struct fs_parse_result result
;
98 struct fuse_fs_context
*ctx
= fc
->fs_private
;
101 opt
= fs_parse(fc
, virtio_fs_parameters
, param
, &result
);
116 static void virtio_fs_free_fc(struct fs_context
*fc
)
118 struct fuse_fs_context
*ctx
= fc
->fs_private
;
123 static inline struct virtio_fs_vq
*vq_to_fsvq(struct virtqueue
*vq
)
125 struct virtio_fs
*fs
= vq
->vdev
->priv
;
127 return &fs
->vqs
[vq
->index
];
130 static inline struct fuse_pqueue
*vq_to_fpq(struct virtqueue
*vq
)
132 return &vq_to_fsvq(vq
)->fud
->pq
;
135 /* Should be called with fsvq->lock held. */
136 static inline void inc_in_flight_req(struct virtio_fs_vq
*fsvq
)
141 /* Should be called with fsvq->lock held. */
142 static inline void dec_in_flight_req(struct virtio_fs_vq
*fsvq
)
144 WARN_ON(fsvq
->in_flight
<= 0);
146 if (!fsvq
->in_flight
)
147 complete(&fsvq
->in_flight_zero
);
150 static void release_virtio_fs_obj(struct kref
*ref
)
152 struct virtio_fs
*vfs
= container_of(ref
, struct virtio_fs
, refcount
);
158 /* Make sure virtiofs_mutex is held */
159 static void virtio_fs_put(struct virtio_fs
*fs
)
161 kref_put(&fs
->refcount
, release_virtio_fs_obj
);
164 static void virtio_fs_fiq_release(struct fuse_iqueue
*fiq
)
166 struct virtio_fs
*vfs
= fiq
->priv
;
168 mutex_lock(&virtio_fs_mutex
);
170 mutex_unlock(&virtio_fs_mutex
);
173 static void virtio_fs_drain_queue(struct virtio_fs_vq
*fsvq
)
175 WARN_ON(fsvq
->in_flight
< 0);
177 /* Wait for in flight requests to finish.*/
178 spin_lock(&fsvq
->lock
);
179 if (fsvq
->in_flight
) {
180 /* We are holding virtio_fs_mutex. There should not be any
181 * waiters waiting for completion.
183 reinit_completion(&fsvq
->in_flight_zero
);
184 spin_unlock(&fsvq
->lock
);
185 wait_for_completion(&fsvq
->in_flight_zero
);
187 spin_unlock(&fsvq
->lock
);
190 flush_work(&fsvq
->done_work
);
191 flush_delayed_work(&fsvq
->dispatch_work
);
194 static void virtio_fs_drain_all_queues_locked(struct virtio_fs
*fs
)
196 struct virtio_fs_vq
*fsvq
;
199 for (i
= 0; i
< fs
->nvqs
; i
++) {
201 virtio_fs_drain_queue(fsvq
);
205 static void virtio_fs_drain_all_queues(struct virtio_fs
*fs
)
207 /* Provides mutual exclusion between ->remove and ->kill_sb
208 * paths. We don't want both of these draining queue at the
209 * same time. Current completion logic reinits completion
210 * and that means there should not be any other thread
211 * doing reinit or waiting for completion already.
213 mutex_lock(&virtio_fs_mutex
);
214 virtio_fs_drain_all_queues_locked(fs
);
215 mutex_unlock(&virtio_fs_mutex
);
218 static void virtio_fs_start_all_queues(struct virtio_fs
*fs
)
220 struct virtio_fs_vq
*fsvq
;
223 for (i
= 0; i
< fs
->nvqs
; i
++) {
225 spin_lock(&fsvq
->lock
);
226 fsvq
->connected
= true;
227 spin_unlock(&fsvq
->lock
);
231 /* Add a new instance to the list or return -EEXIST if tag name exists*/
232 static int virtio_fs_add_instance(struct virtio_fs
*fs
)
234 struct virtio_fs
*fs2
;
235 bool duplicate
= false;
237 mutex_lock(&virtio_fs_mutex
);
239 list_for_each_entry(fs2
, &virtio_fs_instances
, list
) {
240 if (strcmp(fs
->tag
, fs2
->tag
) == 0)
245 list_add_tail(&fs
->list
, &virtio_fs_instances
);
247 mutex_unlock(&virtio_fs_mutex
);
254 /* Return the virtio_fs with a given tag, or NULL */
255 static struct virtio_fs
*virtio_fs_find_instance(const char *tag
)
257 struct virtio_fs
*fs
;
259 mutex_lock(&virtio_fs_mutex
);
261 list_for_each_entry(fs
, &virtio_fs_instances
, list
) {
262 if (strcmp(fs
->tag
, tag
) == 0) {
263 kref_get(&fs
->refcount
);
268 fs
= NULL
; /* not found */
271 mutex_unlock(&virtio_fs_mutex
);
276 static void virtio_fs_free_devs(struct virtio_fs
*fs
)
280 for (i
= 0; i
< fs
->nvqs
; i
++) {
281 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
286 fuse_dev_free(fsvq
->fud
);
291 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
292 static int virtio_fs_read_tag(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
294 char tag_buf
[sizeof_field(struct virtio_fs_config
, tag
)];
298 virtio_cread_bytes(vdev
, offsetof(struct virtio_fs_config
, tag
),
299 &tag_buf
, sizeof(tag_buf
));
300 end
= memchr(tag_buf
, '\0', sizeof(tag_buf
));
302 return -EINVAL
; /* empty tag */
304 end
= &tag_buf
[sizeof(tag_buf
)];
307 fs
->tag
= devm_kmalloc(&vdev
->dev
, len
+ 1, GFP_KERNEL
);
310 memcpy(fs
->tag
, tag_buf
, len
);
315 /* Work function for hiprio completion */
316 static void virtio_fs_hiprio_done_work(struct work_struct
*work
)
318 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
320 struct virtqueue
*vq
= fsvq
->vq
;
322 /* Free completed FUSE_FORGET requests */
323 spin_lock(&fsvq
->lock
);
328 virtqueue_disable_cb(vq
);
330 while ((req
= virtqueue_get_buf(vq
, &len
)) != NULL
) {
332 dec_in_flight_req(fsvq
);
334 } while (!virtqueue_enable_cb(vq
) && likely(!virtqueue_is_broken(vq
)));
335 spin_unlock(&fsvq
->lock
);
338 static void virtio_fs_request_dispatch_work(struct work_struct
*work
)
340 struct fuse_req
*req
;
341 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
345 pr_debug("virtio-fs: worker %s called.\n", __func__
);
347 spin_lock(&fsvq
->lock
);
348 req
= list_first_entry_or_null(&fsvq
->end_reqs
, struct fuse_req
,
351 spin_unlock(&fsvq
->lock
);
355 list_del_init(&req
->list
);
356 spin_unlock(&fsvq
->lock
);
357 fuse_request_end(req
);
360 /* Dispatch pending requests */
362 spin_lock(&fsvq
->lock
);
363 req
= list_first_entry_or_null(&fsvq
->queued_reqs
,
364 struct fuse_req
, list
);
366 spin_unlock(&fsvq
->lock
);
369 list_del_init(&req
->list
);
370 spin_unlock(&fsvq
->lock
);
372 ret
= virtio_fs_enqueue_req(fsvq
, req
, true);
374 if (ret
== -ENOMEM
|| ret
== -ENOSPC
) {
375 spin_lock(&fsvq
->lock
);
376 list_add_tail(&req
->list
, &fsvq
->queued_reqs
);
377 schedule_delayed_work(&fsvq
->dispatch_work
,
378 msecs_to_jiffies(1));
379 spin_unlock(&fsvq
->lock
);
382 req
->out
.h
.error
= ret
;
383 spin_lock(&fsvq
->lock
);
384 dec_in_flight_req(fsvq
);
385 spin_unlock(&fsvq
->lock
);
386 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
388 fuse_request_end(req
);
394 * Returns 1 if queue is full and sender should wait a bit before sending
395 * next request, 0 otherwise.
397 static int send_forget_request(struct virtio_fs_vq
*fsvq
,
398 struct virtio_fs_forget
*forget
,
401 struct scatterlist sg
;
402 struct virtqueue
*vq
;
405 struct virtio_fs_forget_req
*req
= &forget
->req
;
407 spin_lock(&fsvq
->lock
);
408 if (!fsvq
->connected
) {
410 dec_in_flight_req(fsvq
);
415 sg_init_one(&sg
, req
, sizeof(*req
));
417 dev_dbg(&vq
->vdev
->dev
, "%s\n", __func__
);
419 ret
= virtqueue_add_outbuf(vq
, &sg
, 1, forget
, GFP_ATOMIC
);
421 if (ret
== -ENOMEM
|| ret
== -ENOSPC
) {
422 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
424 list_add_tail(&forget
->list
, &fsvq
->queued_reqs
);
425 schedule_delayed_work(&fsvq
->dispatch_work
,
426 msecs_to_jiffies(1));
428 inc_in_flight_req(fsvq
);
432 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
436 dec_in_flight_req(fsvq
);
442 inc_in_flight_req(fsvq
);
443 notify
= virtqueue_kick_prepare(vq
);
444 spin_unlock(&fsvq
->lock
);
447 virtqueue_notify(vq
);
450 spin_unlock(&fsvq
->lock
);
454 static void virtio_fs_hiprio_dispatch_work(struct work_struct
*work
)
456 struct virtio_fs_forget
*forget
;
457 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
459 pr_debug("virtio-fs: worker %s called.\n", __func__
);
461 spin_lock(&fsvq
->lock
);
462 forget
= list_first_entry_or_null(&fsvq
->queued_reqs
,
463 struct virtio_fs_forget
, list
);
465 spin_unlock(&fsvq
->lock
);
469 list_del(&forget
->list
);
470 spin_unlock(&fsvq
->lock
);
471 if (send_forget_request(fsvq
, forget
, true))
476 /* Allocate and copy args into req->argbuf */
477 static int copy_args_to_argbuf(struct fuse_req
*req
)
479 struct fuse_args
*args
= req
->args
;
480 unsigned int offset
= 0;
482 unsigned int num_out
;
486 num_in
= args
->in_numargs
- args
->in_pages
;
487 num_out
= args
->out_numargs
- args
->out_pages
;
488 len
= fuse_len_args(num_in
, (struct fuse_arg
*) args
->in_args
) +
489 fuse_len_args(num_out
, args
->out_args
);
491 req
->argbuf
= kmalloc(len
, GFP_ATOMIC
);
495 for (i
= 0; i
< num_in
; i
++) {
496 memcpy(req
->argbuf
+ offset
,
497 args
->in_args
[i
].value
,
498 args
->in_args
[i
].size
);
499 offset
+= args
->in_args
[i
].size
;
505 /* Copy args out of and free req->argbuf */
506 static void copy_args_from_argbuf(struct fuse_args
*args
, struct fuse_req
*req
)
508 unsigned int remaining
;
511 unsigned int num_out
;
514 remaining
= req
->out
.h
.len
- sizeof(req
->out
.h
);
515 num_in
= args
->in_numargs
- args
->in_pages
;
516 num_out
= args
->out_numargs
- args
->out_pages
;
517 offset
= fuse_len_args(num_in
, (struct fuse_arg
*)args
->in_args
);
519 for (i
= 0; i
< num_out
; i
++) {
520 unsigned int argsize
= args
->out_args
[i
].size
;
522 if (args
->out_argvar
&&
523 i
== args
->out_numargs
- 1 &&
524 argsize
> remaining
) {
528 memcpy(args
->out_args
[i
].value
, req
->argbuf
+ offset
, argsize
);
531 if (i
!= args
->out_numargs
- 1)
532 remaining
-= argsize
;
535 /* Store the actual size of the variable-length arg */
536 if (args
->out_argvar
)
537 args
->out_args
[args
->out_numargs
- 1].size
= remaining
;
543 /* Work function for request completion */
544 static void virtio_fs_request_complete(struct fuse_req
*req
,
545 struct virtio_fs_vq
*fsvq
)
547 struct fuse_pqueue
*fpq
= &fsvq
->fud
->pq
;
548 struct fuse_args
*args
;
549 struct fuse_args_pages
*ap
;
550 unsigned int len
, i
, thislen
;
554 * TODO verify that server properly follows FUSE protocol
558 copy_args_from_argbuf(args
, req
);
560 if (args
->out_pages
&& args
->page_zeroing
) {
561 len
= args
->out_args
[args
->out_numargs
- 1].size
;
562 ap
= container_of(args
, typeof(*ap
), args
);
563 for (i
= 0; i
< ap
->num_pages
; i
++) {
564 thislen
= ap
->descs
[i
].length
;
566 WARN_ON(ap
->descs
[i
].offset
);
568 zero_user_segment(page
, len
, thislen
);
576 spin_lock(&fpq
->lock
);
577 clear_bit(FR_SENT
, &req
->flags
);
578 spin_unlock(&fpq
->lock
);
580 fuse_request_end(req
);
581 spin_lock(&fsvq
->lock
);
582 dec_in_flight_req(fsvq
);
583 spin_unlock(&fsvq
->lock
);
586 static void virtio_fs_complete_req_work(struct work_struct
*work
)
588 struct virtio_fs_req_work
*w
=
589 container_of(work
, typeof(*w
), done_work
);
591 virtio_fs_request_complete(w
->req
, w
->fsvq
);
595 static void virtio_fs_requests_done_work(struct work_struct
*work
)
597 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
599 struct fuse_pqueue
*fpq
= &fsvq
->fud
->pq
;
600 struct virtqueue
*vq
= fsvq
->vq
;
601 struct fuse_req
*req
;
602 struct fuse_req
*next
;
606 /* Collect completed requests off the virtqueue */
607 spin_lock(&fsvq
->lock
);
609 virtqueue_disable_cb(vq
);
611 while ((req
= virtqueue_get_buf(vq
, &len
)) != NULL
) {
612 spin_lock(&fpq
->lock
);
613 list_move_tail(&req
->list
, &reqs
);
614 spin_unlock(&fpq
->lock
);
616 } while (!virtqueue_enable_cb(vq
) && likely(!virtqueue_is_broken(vq
)));
617 spin_unlock(&fsvq
->lock
);
620 list_for_each_entry_safe(req
, next
, &reqs
, list
) {
621 list_del_init(&req
->list
);
623 /* blocking async request completes in a worker context */
624 if (req
->args
->may_block
) {
625 struct virtio_fs_req_work
*w
;
627 w
= kzalloc(sizeof(*w
), GFP_NOFS
| __GFP_NOFAIL
);
628 INIT_WORK(&w
->done_work
, virtio_fs_complete_req_work
);
631 schedule_work(&w
->done_work
);
633 virtio_fs_request_complete(req
, fsvq
);
638 /* Virtqueue interrupt handler */
639 static void virtio_fs_vq_done(struct virtqueue
*vq
)
641 struct virtio_fs_vq
*fsvq
= vq_to_fsvq(vq
);
643 dev_dbg(&vq
->vdev
->dev
, "%s %s\n", __func__
, fsvq
->name
);
645 schedule_work(&fsvq
->done_work
);
648 static void virtio_fs_init_vq(struct virtio_fs_vq
*fsvq
, char *name
,
651 strncpy(fsvq
->name
, name
, VQ_NAME_LEN
);
652 spin_lock_init(&fsvq
->lock
);
653 INIT_LIST_HEAD(&fsvq
->queued_reqs
);
654 INIT_LIST_HEAD(&fsvq
->end_reqs
);
655 init_completion(&fsvq
->in_flight_zero
);
657 if (vq_type
== VQ_REQUEST
) {
658 INIT_WORK(&fsvq
->done_work
, virtio_fs_requests_done_work
);
659 INIT_DELAYED_WORK(&fsvq
->dispatch_work
,
660 virtio_fs_request_dispatch_work
);
662 INIT_WORK(&fsvq
->done_work
, virtio_fs_hiprio_done_work
);
663 INIT_DELAYED_WORK(&fsvq
->dispatch_work
,
664 virtio_fs_hiprio_dispatch_work
);
668 /* Initialize virtqueues */
669 static int virtio_fs_setup_vqs(struct virtio_device
*vdev
,
670 struct virtio_fs
*fs
)
672 struct virtqueue
**vqs
;
673 vq_callback_t
**callbacks
;
678 virtio_cread_le(vdev
, struct virtio_fs_config
, num_request_queues
,
679 &fs
->num_request_queues
);
680 if (fs
->num_request_queues
== 0)
683 fs
->nvqs
= VQ_REQUEST
+ fs
->num_request_queues
;
684 fs
->vqs
= kcalloc(fs
->nvqs
, sizeof(fs
->vqs
[VQ_HIPRIO
]), GFP_KERNEL
);
688 vqs
= kmalloc_array(fs
->nvqs
, sizeof(vqs
[VQ_HIPRIO
]), GFP_KERNEL
);
689 callbacks
= kmalloc_array(fs
->nvqs
, sizeof(callbacks
[VQ_HIPRIO
]),
691 names
= kmalloc_array(fs
->nvqs
, sizeof(names
[VQ_HIPRIO
]), GFP_KERNEL
);
692 if (!vqs
|| !callbacks
|| !names
) {
697 /* Initialize the hiprio/forget request virtqueue */
698 callbacks
[VQ_HIPRIO
] = virtio_fs_vq_done
;
699 virtio_fs_init_vq(&fs
->vqs
[VQ_HIPRIO
], "hiprio", VQ_HIPRIO
);
700 names
[VQ_HIPRIO
] = fs
->vqs
[VQ_HIPRIO
].name
;
702 /* Initialize the requests virtqueues */
703 for (i
= VQ_REQUEST
; i
< fs
->nvqs
; i
++) {
704 char vq_name
[VQ_NAME_LEN
];
706 snprintf(vq_name
, VQ_NAME_LEN
, "requests.%u", i
- VQ_REQUEST
);
707 virtio_fs_init_vq(&fs
->vqs
[i
], vq_name
, VQ_REQUEST
);
708 callbacks
[i
] = virtio_fs_vq_done
;
709 names
[i
] = fs
->vqs
[i
].name
;
712 ret
= virtio_find_vqs(vdev
, fs
->nvqs
, vqs
, callbacks
, names
, NULL
);
716 for (i
= 0; i
< fs
->nvqs
; i
++)
717 fs
->vqs
[i
].vq
= vqs
[i
];
719 virtio_fs_start_all_queues(fs
);
729 /* Free virtqueues (device must already be reset) */
730 static void virtio_fs_cleanup_vqs(struct virtio_device
*vdev
,
731 struct virtio_fs
*fs
)
733 vdev
->config
->del_vqs(vdev
);
736 /* Map a window offset to a page frame number. The window offset will have
737 * been produced by .iomap_begin(), which maps a file offset to a window
740 static long virtio_fs_direct_access(struct dax_device
*dax_dev
, pgoff_t pgoff
,
741 long nr_pages
, void **kaddr
, pfn_t
*pfn
)
743 struct virtio_fs
*fs
= dax_get_private(dax_dev
);
744 phys_addr_t offset
= PFN_PHYS(pgoff
);
745 size_t max_nr_pages
= fs
->window_len
/PAGE_SIZE
- pgoff
;
748 *kaddr
= fs
->window_kaddr
+ offset
;
750 *pfn
= phys_to_pfn_t(fs
->window_phys_addr
+ offset
,
752 return nr_pages
> max_nr_pages
? max_nr_pages
: nr_pages
;
755 static size_t virtio_fs_copy_from_iter(struct dax_device
*dax_dev
,
756 pgoff_t pgoff
, void *addr
,
757 size_t bytes
, struct iov_iter
*i
)
759 return copy_from_iter(addr
, bytes
, i
);
762 static size_t virtio_fs_copy_to_iter(struct dax_device
*dax_dev
,
763 pgoff_t pgoff
, void *addr
,
764 size_t bytes
, struct iov_iter
*i
)
766 return copy_to_iter(addr
, bytes
, i
);
769 static int virtio_fs_zero_page_range(struct dax_device
*dax_dev
,
770 pgoff_t pgoff
, size_t nr_pages
)
775 rc
= dax_direct_access(dax_dev
, pgoff
, nr_pages
, &kaddr
, NULL
);
778 memset(kaddr
, 0, nr_pages
<< PAGE_SHIFT
);
779 dax_flush(dax_dev
, kaddr
, nr_pages
<< PAGE_SHIFT
);
783 static const struct dax_operations virtio_fs_dax_ops
= {
784 .direct_access
= virtio_fs_direct_access
,
785 .copy_from_iter
= virtio_fs_copy_from_iter
,
786 .copy_to_iter
= virtio_fs_copy_to_iter
,
787 .zero_page_range
= virtio_fs_zero_page_range
,
790 static void virtio_fs_cleanup_dax(void *data
)
792 struct dax_device
*dax_dev
= data
;
798 static int virtio_fs_setup_dax(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
800 struct virtio_shm_region cache_reg
;
801 struct dev_pagemap
*pgmap
;
804 if (!IS_ENABLED(CONFIG_FUSE_DAX
))
807 /* Get cache region */
808 have_cache
= virtio_get_shm_region(vdev
, &cache_reg
,
809 (u8
)VIRTIO_FS_SHMCAP_ID_CACHE
);
811 dev_notice(&vdev
->dev
, "%s: No cache capability\n", __func__
);
815 if (!devm_request_mem_region(&vdev
->dev
, cache_reg
.addr
, cache_reg
.len
,
816 dev_name(&vdev
->dev
))) {
817 dev_warn(&vdev
->dev
, "could not reserve region addr=0x%llx len=0x%llx\n",
818 cache_reg
.addr
, cache_reg
.len
);
822 dev_notice(&vdev
->dev
, "Cache len: 0x%llx @ 0x%llx\n", cache_reg
.len
,
825 pgmap
= devm_kzalloc(&vdev
->dev
, sizeof(*pgmap
), GFP_KERNEL
);
829 pgmap
->type
= MEMORY_DEVICE_FS_DAX
;
831 /* Ideally we would directly use the PCI BAR resource but
832 * devm_memremap_pages() wants its own copy in pgmap. So
833 * initialize a struct resource from scratch (only the start
834 * and end fields will be used).
836 pgmap
->range
= (struct range
) {
837 .start
= (phys_addr_t
) cache_reg
.addr
,
838 .end
= (phys_addr_t
) cache_reg
.addr
+ cache_reg
.len
- 1,
842 fs
->window_kaddr
= devm_memremap_pages(&vdev
->dev
, pgmap
);
843 if (IS_ERR(fs
->window_kaddr
))
844 return PTR_ERR(fs
->window_kaddr
);
846 fs
->window_phys_addr
= (phys_addr_t
) cache_reg
.addr
;
847 fs
->window_len
= (phys_addr_t
) cache_reg
.len
;
849 dev_dbg(&vdev
->dev
, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
850 __func__
, fs
->window_kaddr
, cache_reg
.addr
, cache_reg
.len
);
852 fs
->dax_dev
= alloc_dax(fs
, NULL
, &virtio_fs_dax_ops
, 0);
853 if (IS_ERR(fs
->dax_dev
))
854 return PTR_ERR(fs
->dax_dev
);
856 return devm_add_action_or_reset(&vdev
->dev
, virtio_fs_cleanup_dax
,
860 static int virtio_fs_probe(struct virtio_device
*vdev
)
862 struct virtio_fs
*fs
;
865 fs
= kzalloc(sizeof(*fs
), GFP_KERNEL
);
868 kref_init(&fs
->refcount
);
871 ret
= virtio_fs_read_tag(vdev
, fs
);
875 ret
= virtio_fs_setup_vqs(vdev
, fs
);
879 /* TODO vq affinity */
881 ret
= virtio_fs_setup_dax(vdev
, fs
);
885 /* Bring the device online in case the filesystem is mounted and
886 * requests need to be sent before we return.
888 virtio_device_ready(vdev
);
890 ret
= virtio_fs_add_instance(fs
);
897 vdev
->config
->reset(vdev
);
898 virtio_fs_cleanup_vqs(vdev
, fs
);
906 static void virtio_fs_stop_all_queues(struct virtio_fs
*fs
)
908 struct virtio_fs_vq
*fsvq
;
911 for (i
= 0; i
< fs
->nvqs
; i
++) {
913 spin_lock(&fsvq
->lock
);
914 fsvq
->connected
= false;
915 spin_unlock(&fsvq
->lock
);
919 static void virtio_fs_remove(struct virtio_device
*vdev
)
921 struct virtio_fs
*fs
= vdev
->priv
;
923 mutex_lock(&virtio_fs_mutex
);
924 /* This device is going away. No one should get new reference */
925 list_del_init(&fs
->list
);
926 virtio_fs_stop_all_queues(fs
);
927 virtio_fs_drain_all_queues_locked(fs
);
928 vdev
->config
->reset(vdev
);
929 virtio_fs_cleanup_vqs(vdev
, fs
);
932 /* Put device reference on virtio_fs object */
934 mutex_unlock(&virtio_fs_mutex
);
937 #ifdef CONFIG_PM_SLEEP
938 static int virtio_fs_freeze(struct virtio_device
*vdev
)
940 /* TODO need to save state here */
941 pr_warn("virtio-fs: suspend/resume not yet supported\n");
945 static int virtio_fs_restore(struct virtio_device
*vdev
)
947 /* TODO need to restore state here */
950 #endif /* CONFIG_PM_SLEEP */
952 static const struct virtio_device_id id_table
[] = {
953 { VIRTIO_ID_FS
, VIRTIO_DEV_ANY_ID
},
957 static const unsigned int feature_table
[] = {};
959 static struct virtio_driver virtio_fs_driver
= {
960 .driver
.name
= KBUILD_MODNAME
,
961 .driver
.owner
= THIS_MODULE
,
962 .id_table
= id_table
,
963 .feature_table
= feature_table
,
964 .feature_table_size
= ARRAY_SIZE(feature_table
),
965 .probe
= virtio_fs_probe
,
966 .remove
= virtio_fs_remove
,
967 #ifdef CONFIG_PM_SLEEP
968 .freeze
= virtio_fs_freeze
,
969 .restore
= virtio_fs_restore
,
973 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue
*fiq
)
974 __releases(fiq
->lock
)
976 struct fuse_forget_link
*link
;
977 struct virtio_fs_forget
*forget
;
978 struct virtio_fs_forget_req
*req
;
979 struct virtio_fs
*fs
;
980 struct virtio_fs_vq
*fsvq
;
983 link
= fuse_dequeue_forget(fiq
, 1, NULL
);
984 unique
= fuse_get_unique(fiq
);
987 fsvq
= &fs
->vqs
[VQ_HIPRIO
];
988 spin_unlock(&fiq
->lock
);
990 /* Allocate a buffer for the request */
991 forget
= kmalloc(sizeof(*forget
), GFP_NOFS
| __GFP_NOFAIL
);
994 req
->ih
= (struct fuse_in_header
){
995 .opcode
= FUSE_FORGET
,
996 .nodeid
= link
->forget_one
.nodeid
,
1000 req
->arg
= (struct fuse_forget_in
){
1001 .nlookup
= link
->forget_one
.nlookup
,
1004 send_forget_request(fsvq
, forget
, false);
1008 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue
*fiq
)
1009 __releases(fiq
->lock
)
1014 * Normal fs operations on a local filesystems aren't interruptible.
1015 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
1016 * with shared lock between host and guest.
1018 spin_unlock(&fiq
->lock
);
1021 /* Count number of scatter-gather elements required */
1022 static unsigned int sg_count_fuse_pages(struct fuse_page_desc
*page_descs
,
1023 unsigned int num_pages
,
1024 unsigned int total_len
)
1027 unsigned int this_len
;
1029 for (i
= 0; i
< num_pages
&& total_len
; i
++) {
1030 this_len
= min(page_descs
[i
].length
, total_len
);
1031 total_len
-= this_len
;
1037 /* Return the number of scatter-gather list elements required */
1038 static unsigned int sg_count_fuse_req(struct fuse_req
*req
)
1040 struct fuse_args
*args
= req
->args
;
1041 struct fuse_args_pages
*ap
= container_of(args
, typeof(*ap
), args
);
1042 unsigned int size
, total_sgs
= 1 /* fuse_in_header */;
1044 if (args
->in_numargs
- args
->in_pages
)
1047 if (args
->in_pages
) {
1048 size
= args
->in_args
[args
->in_numargs
- 1].size
;
1049 total_sgs
+= sg_count_fuse_pages(ap
->descs
, ap
->num_pages
,
1053 if (!test_bit(FR_ISREPLY
, &req
->flags
))
1056 total_sgs
+= 1 /* fuse_out_header */;
1058 if (args
->out_numargs
- args
->out_pages
)
1061 if (args
->out_pages
) {
1062 size
= args
->out_args
[args
->out_numargs
- 1].size
;
1063 total_sgs
+= sg_count_fuse_pages(ap
->descs
, ap
->num_pages
,
1070 /* Add pages to scatter-gather list and return number of elements used */
1071 static unsigned int sg_init_fuse_pages(struct scatterlist
*sg
,
1072 struct page
**pages
,
1073 struct fuse_page_desc
*page_descs
,
1074 unsigned int num_pages
,
1075 unsigned int total_len
)
1078 unsigned int this_len
;
1080 for (i
= 0; i
< num_pages
&& total_len
; i
++) {
1081 sg_init_table(&sg
[i
], 1);
1082 this_len
= min(page_descs
[i
].length
, total_len
);
1083 sg_set_page(&sg
[i
], pages
[i
], this_len
, page_descs
[i
].offset
);
1084 total_len
-= this_len
;
1090 /* Add args to scatter-gather list and return number of elements used */
1091 static unsigned int sg_init_fuse_args(struct scatterlist
*sg
,
1092 struct fuse_req
*req
,
1093 struct fuse_arg
*args
,
1094 unsigned int numargs
,
1097 unsigned int *len_used
)
1099 struct fuse_args_pages
*ap
= container_of(req
->args
, typeof(*ap
), args
);
1100 unsigned int total_sgs
= 0;
1103 len
= fuse_len_args(numargs
- argpages
, args
);
1105 sg_init_one(&sg
[total_sgs
++], argbuf
, len
);
1108 total_sgs
+= sg_init_fuse_pages(&sg
[total_sgs
],
1109 ap
->pages
, ap
->descs
,
1111 args
[numargs
- 1].size
);
1119 /* Add a request to a virtqueue and kick the device */
1120 static int virtio_fs_enqueue_req(struct virtio_fs_vq
*fsvq
,
1121 struct fuse_req
*req
, bool in_flight
)
1123 /* requests need at least 4 elements */
1124 struct scatterlist
*stack_sgs
[6];
1125 struct scatterlist stack_sg
[ARRAY_SIZE(stack_sgs
)];
1126 struct scatterlist
**sgs
= stack_sgs
;
1127 struct scatterlist
*sg
= stack_sg
;
1128 struct virtqueue
*vq
;
1129 struct fuse_args
*args
= req
->args
;
1130 unsigned int argbuf_used
= 0;
1131 unsigned int out_sgs
= 0;
1132 unsigned int in_sgs
= 0;
1133 unsigned int total_sgs
;
1137 struct fuse_pqueue
*fpq
;
1139 /* Does the sglist fit on the stack? */
1140 total_sgs
= sg_count_fuse_req(req
);
1141 if (total_sgs
> ARRAY_SIZE(stack_sgs
)) {
1142 sgs
= kmalloc_array(total_sgs
, sizeof(sgs
[0]), GFP_ATOMIC
);
1143 sg
= kmalloc_array(total_sgs
, sizeof(sg
[0]), GFP_ATOMIC
);
1150 /* Use a bounce buffer since stack args cannot be mapped */
1151 ret
= copy_args_to_argbuf(req
);
1155 /* Request elements */
1156 sg_init_one(&sg
[out_sgs
++], &req
->in
.h
, sizeof(req
->in
.h
));
1157 out_sgs
+= sg_init_fuse_args(&sg
[out_sgs
], req
,
1158 (struct fuse_arg
*)args
->in_args
,
1159 args
->in_numargs
, args
->in_pages
,
1160 req
->argbuf
, &argbuf_used
);
1162 /* Reply elements */
1163 if (test_bit(FR_ISREPLY
, &req
->flags
)) {
1164 sg_init_one(&sg
[out_sgs
+ in_sgs
++],
1165 &req
->out
.h
, sizeof(req
->out
.h
));
1166 in_sgs
+= sg_init_fuse_args(&sg
[out_sgs
+ in_sgs
], req
,
1167 args
->out_args
, args
->out_numargs
,
1169 req
->argbuf
+ argbuf_used
, NULL
);
1172 WARN_ON(out_sgs
+ in_sgs
!= total_sgs
);
1174 for (i
= 0; i
< total_sgs
; i
++)
1177 spin_lock(&fsvq
->lock
);
1179 if (!fsvq
->connected
) {
1180 spin_unlock(&fsvq
->lock
);
1186 ret
= virtqueue_add_sgs(vq
, sgs
, out_sgs
, in_sgs
, req
, GFP_ATOMIC
);
1188 spin_unlock(&fsvq
->lock
);
1192 /* Request successfully sent. */
1193 fpq
= &fsvq
->fud
->pq
;
1194 spin_lock(&fpq
->lock
);
1195 list_add_tail(&req
->list
, fpq
->processing
);
1196 spin_unlock(&fpq
->lock
);
1197 set_bit(FR_SENT
, &req
->flags
);
1198 /* matches barrier in request_wait_answer() */
1199 smp_mb__after_atomic();
1202 inc_in_flight_req(fsvq
);
1203 notify
= virtqueue_kick_prepare(vq
);
1205 spin_unlock(&fsvq
->lock
);
1208 virtqueue_notify(vq
);
1211 if (ret
< 0 && req
->argbuf
) {
1215 if (sgs
!= stack_sgs
) {
1223 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue
*fiq
)
1224 __releases(fiq
->lock
)
1226 unsigned int queue_id
= VQ_REQUEST
; /* TODO multiqueue */
1227 struct virtio_fs
*fs
;
1228 struct fuse_req
*req
;
1229 struct virtio_fs_vq
*fsvq
;
1232 WARN_ON(list_empty(&fiq
->pending
));
1233 req
= list_last_entry(&fiq
->pending
, struct fuse_req
, list
);
1234 clear_bit(FR_PENDING
, &req
->flags
);
1235 list_del_init(&req
->list
);
1236 WARN_ON(!list_empty(&fiq
->pending
));
1237 spin_unlock(&fiq
->lock
);
1241 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
1242 __func__
, req
->in
.h
.opcode
, req
->in
.h
.unique
,
1243 req
->in
.h
.nodeid
, req
->in
.h
.len
,
1244 fuse_len_args(req
->args
->out_numargs
, req
->args
->out_args
));
1246 fsvq
= &fs
->vqs
[queue_id
];
1247 ret
= virtio_fs_enqueue_req(fsvq
, req
, false);
1249 if (ret
== -ENOMEM
|| ret
== -ENOSPC
) {
1251 * Virtqueue full. Retry submission from worker
1252 * context as we might be holding fc->bg_lock.
1254 spin_lock(&fsvq
->lock
);
1255 list_add_tail(&req
->list
, &fsvq
->queued_reqs
);
1256 inc_in_flight_req(fsvq
);
1257 schedule_delayed_work(&fsvq
->dispatch_work
,
1258 msecs_to_jiffies(1));
1259 spin_unlock(&fsvq
->lock
);
1262 req
->out
.h
.error
= ret
;
1263 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret
);
1265 /* Can't end request in submission context. Use a worker */
1266 spin_lock(&fsvq
->lock
);
1267 list_add_tail(&req
->list
, &fsvq
->end_reqs
);
1268 schedule_delayed_work(&fsvq
->dispatch_work
, 0);
1269 spin_unlock(&fsvq
->lock
);
1274 static const struct fuse_iqueue_ops virtio_fs_fiq_ops
= {
1275 .wake_forget_and_unlock
= virtio_fs_wake_forget_and_unlock
,
1276 .wake_interrupt_and_unlock
= virtio_fs_wake_interrupt_and_unlock
,
1277 .wake_pending_and_unlock
= virtio_fs_wake_pending_and_unlock
,
1278 .release
= virtio_fs_fiq_release
,
1281 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context
*ctx
)
1283 ctx
->rootmode
= S_IFDIR
;
1284 ctx
->default_permissions
= 1;
1285 ctx
->allow_other
= 1;
1286 ctx
->max_read
= UINT_MAX
;
1288 ctx
->destroy
= true;
1289 ctx
->no_control
= true;
1290 ctx
->no_force_umount
= true;
1293 static int virtio_fs_fill_super(struct super_block
*sb
, struct fs_context
*fsc
)
1295 struct fuse_mount
*fm
= get_fuse_mount_super(sb
);
1296 struct fuse_conn
*fc
= fm
->fc
;
1297 struct virtio_fs
*fs
= fc
->iq
.priv
;
1298 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
1302 virtio_fs_ctx_set_defaults(ctx
);
1303 mutex_lock(&virtio_fs_mutex
);
1305 /* After holding mutex, make sure virtiofs device is still there.
1306 * Though we are holding a reference to it, drive ->remove might
1307 * still have cleaned up virtual queues. In that case bail out.
1310 if (list_empty(&fs
->list
)) {
1311 pr_info("virtio-fs: tag <%s> not found\n", fs
->tag
);
1316 /* Allocate fuse_dev for hiprio and notification queues */
1317 for (i
= 0; i
< fs
->nvqs
; i
++) {
1318 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
1320 fsvq
->fud
= fuse_dev_alloc();
1322 goto err_free_fuse_devs
;
1325 /* virtiofs allocates and installs its own fuse devices */
1328 ctx
->dax_dev
= fs
->dax_dev
;
1329 err
= fuse_fill_super_common(sb
, ctx
);
1331 goto err_free_fuse_devs
;
1333 for (i
= 0; i
< fs
->nvqs
; i
++) {
1334 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
1336 fuse_dev_install(fsvq
->fud
, fc
);
1339 /* Previous unmount will stop all queues. Start these again */
1340 virtio_fs_start_all_queues(fs
);
1342 mutex_unlock(&virtio_fs_mutex
);
1346 virtio_fs_free_devs(fs
);
1348 mutex_unlock(&virtio_fs_mutex
);
1352 static void virtio_fs_conn_destroy(struct fuse_mount
*fm
)
1354 struct fuse_conn
*fc
= fm
->fc
;
1355 struct virtio_fs
*vfs
= fc
->iq
.priv
;
1356 struct virtio_fs_vq
*fsvq
= &vfs
->vqs
[VQ_HIPRIO
];
1358 /* Stop dax worker. Soon evict_inodes() will be called which
1359 * will free all memory ranges belonging to all inodes.
1361 if (IS_ENABLED(CONFIG_FUSE_DAX
))
1362 fuse_dax_cancel_work(fc
);
1364 /* Stop forget queue. Soon destroy will be sent */
1365 spin_lock(&fsvq
->lock
);
1366 fsvq
->connected
= false;
1367 spin_unlock(&fsvq
->lock
);
1368 virtio_fs_drain_all_queues(vfs
);
1370 fuse_conn_destroy(fm
);
1372 /* fuse_conn_destroy() must have sent destroy. Stop all queues
1373 * and drain one more time and free fuse devices. Freeing fuse
1374 * devices will drop their reference on fuse_conn and that in
1375 * turn will drop its reference on virtio_fs object.
1377 virtio_fs_stop_all_queues(vfs
);
1378 virtio_fs_drain_all_queues(vfs
);
1379 virtio_fs_free_devs(vfs
);
1382 static void virtio_kill_sb(struct super_block
*sb
)
1384 struct fuse_mount
*fm
= get_fuse_mount_super(sb
);
1387 /* If mount failed, we can still be called without any fc */
1389 last
= fuse_mount_remove(fm
);
1391 virtio_fs_conn_destroy(fm
);
1393 kill_anon_super(sb
);
1396 static int virtio_fs_test_super(struct super_block
*sb
,
1397 struct fs_context
*fsc
)
1399 struct fuse_mount
*fsc_fm
= fsc
->s_fs_info
;
1400 struct fuse_mount
*sb_fm
= get_fuse_mount_super(sb
);
1402 return fsc_fm
->fc
->iq
.priv
== sb_fm
->fc
->iq
.priv
;
1405 static int virtio_fs_get_tree(struct fs_context
*fsc
)
1407 struct virtio_fs
*fs
;
1408 struct super_block
*sb
;
1409 struct fuse_conn
*fc
;
1410 struct fuse_mount
*fm
;
1413 /* This gets a reference on virtio_fs object. This ptr gets installed
1414 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1415 * to drop the reference to this object.
1417 fs
= virtio_fs_find_instance(fsc
->source
);
1419 pr_info("virtio-fs: tag <%s> not found\n", fsc
->source
);
1424 fc
= kzalloc(sizeof(struct fuse_conn
), GFP_KERNEL
);
1428 fm
= kzalloc(sizeof(struct fuse_mount
), GFP_KERNEL
);
1432 fuse_conn_init(fc
, fm
, get_user_ns(current_user_ns()),
1433 &virtio_fs_fiq_ops
, fs
);
1434 fc
->release
= fuse_free_conn
;
1435 fc
->delete_stale
= true;
1436 fc
->auto_submounts
= true;
1438 fsc
->s_fs_info
= fm
;
1439 sb
= sget_fc(fsc
, virtio_fs_test_super
, set_anon_super_fc
);
1440 if (fsc
->s_fs_info
) {
1448 err
= virtio_fs_fill_super(sb
, fsc
);
1452 sb
->s_fs_info
= NULL
;
1453 deactivate_locked_super(sb
);
1457 sb
->s_flags
|= SB_ACTIVE
;
1461 fsc
->root
= dget(sb
->s_root
);
1466 mutex_lock(&virtio_fs_mutex
);
1468 mutex_unlock(&virtio_fs_mutex
);
1472 static const struct fs_context_operations virtio_fs_context_ops
= {
1473 .free
= virtio_fs_free_fc
,
1474 .parse_param
= virtio_fs_parse_param
,
1475 .get_tree
= virtio_fs_get_tree
,
1478 static int virtio_fs_init_fs_context(struct fs_context
*fsc
)
1480 struct fuse_fs_context
*ctx
;
1482 ctx
= kzalloc(sizeof(struct fuse_fs_context
), GFP_KERNEL
);
1485 fsc
->fs_private
= ctx
;
1486 fsc
->ops
= &virtio_fs_context_ops
;
1490 static struct file_system_type virtio_fs_type
= {
1491 .owner
= THIS_MODULE
,
1493 .init_fs_context
= virtio_fs_init_fs_context
,
1494 .kill_sb
= virtio_kill_sb
,
1497 static int __init
virtio_fs_init(void)
1501 ret
= register_virtio_driver(&virtio_fs_driver
);
1505 ret
= register_filesystem(&virtio_fs_type
);
1507 unregister_virtio_driver(&virtio_fs_driver
);
1513 module_init(virtio_fs_init
);
1515 static void __exit
virtio_fs_exit(void)
1517 unregister_filesystem(&virtio_fs_type
);
1518 unregister_virtio_driver(&virtio_fs_driver
);
1520 module_exit(virtio_fs_exit
);
1522 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1523 MODULE_DESCRIPTION("Virtio Filesystem");
1524 MODULE_LICENSE("GPL");
1525 MODULE_ALIAS_FS(KBUILD_MODNAME
);
1526 MODULE_DEVICE_TABLE(virtio
, id_table
);