1 // SPDX-License-Identifier: GPL-2.0
3 * virtio-fs: Virtio Filesystem
4 * Copyright (C) 2018 Red Hat, Inc.
10 #include <linux/interrupt.h>
11 #include <linux/group_cpus.h>
12 #include <linux/pfn_t.h>
13 #include <linux/memremap.h>
14 #include <linux/module.h>
15 #include <linux/virtio.h>
16 #include <linux/virtio_fs.h>
17 #include <linux/delay.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/highmem.h>
21 #include <linux/cleanup.h>
22 #include <linux/uio.h>
25 /* Used to help calculate the FUSE connection's max_pages limit for a request's
26 * size. Parts of the struct fuse_req are sliced into scattergather lists in
27 * addition to the pages used, so this can help account for that overhead.
29 #define FUSE_HEADER_OVERHEAD 4
31 /* List of virtio-fs device instances and a lock for the list. Also provides
32 * mutual exclusion in device removal and mounting path
34 static DEFINE_MUTEX(virtio_fs_mutex
);
35 static LIST_HEAD(virtio_fs_instances
);
37 /* The /sys/fs/virtio_fs/ kset */
38 static struct kset
*virtio_fs_kset
;
45 #define VQ_NAME_LEN 24
47 /* Per-virtqueue state */
50 struct virtqueue
*vq
; /* protected by ->lock */
51 struct work_struct done_work
;
52 struct list_head queued_reqs
;
53 struct list_head end_reqs
; /* End these requests */
54 struct work_struct dispatch_work
;
58 struct completion in_flight_zero
; /* No inflight requests */
60 char name
[VQ_NAME_LEN
];
61 } ____cacheline_aligned_in_smp
;
63 /* A virtio-fs device instance */
66 struct kobject
*mqs_kobj
;
67 struct list_head list
; /* on virtio_fs_instances */
69 struct virtio_fs_vq
*vqs
;
70 unsigned int nvqs
; /* number of virtqueues */
71 unsigned int num_request_queues
; /* number of request queues */
72 struct dax_device
*dax_dev
;
74 unsigned int *mq_map
; /* index = cpu id, value = request vq id */
76 /* DAX memory window where file contents are mapped */
78 phys_addr_t window_phys_addr
;
82 struct virtio_fs_forget_req
{
83 struct fuse_in_header ih
;
84 struct fuse_forget_in arg
;
87 struct virtio_fs_forget
{
88 /* This request can be temporarily queued on virt queue */
89 struct list_head list
;
90 struct virtio_fs_forget_req req
;
93 struct virtio_fs_req_work
{
95 struct virtio_fs_vq
*fsvq
;
96 struct work_struct done_work
;
99 static int virtio_fs_enqueue_req(struct virtio_fs_vq
*fsvq
,
100 struct fuse_req
*req
, bool in_flight
);
102 static const struct constant_table dax_param_enums
[] = {
103 {"always", FUSE_DAX_ALWAYS
},
104 {"never", FUSE_DAX_NEVER
},
105 {"inode", FUSE_DAX_INODE_USER
},
114 static const struct fs_parameter_spec virtio_fs_parameters
[] = {
115 fsparam_flag("dax", OPT_DAX
),
116 fsparam_enum("dax", OPT_DAX_ENUM
, dax_param_enums
),
120 static int virtio_fs_parse_param(struct fs_context
*fsc
,
121 struct fs_parameter
*param
)
123 struct fs_parse_result result
;
124 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
127 opt
= fs_parse(fsc
, virtio_fs_parameters
, param
, &result
);
133 ctx
->dax_mode
= FUSE_DAX_ALWAYS
;
136 ctx
->dax_mode
= result
.uint_32
;
145 static void virtio_fs_free_fsc(struct fs_context
*fsc
)
147 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
152 static inline struct virtio_fs_vq
*vq_to_fsvq(struct virtqueue
*vq
)
154 struct virtio_fs
*fs
= vq
->vdev
->priv
;
156 return &fs
->vqs
[vq
->index
];
159 /* Should be called with fsvq->lock held. */
160 static inline void inc_in_flight_req(struct virtio_fs_vq
*fsvq
)
165 /* Should be called with fsvq->lock held. */
166 static inline void dec_in_flight_req(struct virtio_fs_vq
*fsvq
)
168 WARN_ON(fsvq
->in_flight
<= 0);
170 if (!fsvq
->in_flight
)
171 complete(&fsvq
->in_flight_zero
);
174 static ssize_t
tag_show(struct kobject
*kobj
,
175 struct kobj_attribute
*attr
, char *buf
)
177 struct virtio_fs
*fs
= container_of(kobj
, struct virtio_fs
, kobj
);
179 return sysfs_emit(buf
, "%s\n", fs
->tag
);
182 static struct kobj_attribute virtio_fs_tag_attr
= __ATTR_RO(tag
);
184 static struct attribute
*virtio_fs_attrs
[] = {
185 &virtio_fs_tag_attr
.attr
,
188 ATTRIBUTE_GROUPS(virtio_fs
);
190 static void virtio_fs_ktype_release(struct kobject
*kobj
)
192 struct virtio_fs
*vfs
= container_of(kobj
, struct virtio_fs
, kobj
);
199 static const struct kobj_type virtio_fs_ktype
= {
200 .release
= virtio_fs_ktype_release
,
201 .sysfs_ops
= &kobj_sysfs_ops
,
202 .default_groups
= virtio_fs_groups
,
205 static struct virtio_fs_vq
*virtio_fs_kobj_to_vq(struct virtio_fs
*fs
,
206 struct kobject
*kobj
)
210 for (i
= 0; i
< fs
->nvqs
; i
++) {
211 if (kobj
== fs
->vqs
[i
].kobj
)
217 static ssize_t
name_show(struct kobject
*kobj
,
218 struct kobj_attribute
*attr
, char *buf
)
220 struct virtio_fs
*fs
= container_of(kobj
->parent
->parent
, struct virtio_fs
, kobj
);
221 struct virtio_fs_vq
*fsvq
= virtio_fs_kobj_to_vq(fs
, kobj
);
225 return sysfs_emit(buf
, "%s\n", fsvq
->name
);
228 static struct kobj_attribute virtio_fs_vq_name_attr
= __ATTR_RO(name
);
230 static ssize_t
cpu_list_show(struct kobject
*kobj
,
231 struct kobj_attribute
*attr
, char *buf
)
233 struct virtio_fs
*fs
= container_of(kobj
->parent
->parent
, struct virtio_fs
, kobj
);
234 struct virtio_fs_vq
*fsvq
= virtio_fs_kobj_to_vq(fs
, kobj
);
235 unsigned int cpu
, qid
;
236 const size_t size
= PAGE_SIZE
- 1;
238 int ret
= 0, pos
= 0;
243 qid
= fsvq
->vq
->index
;
244 for (cpu
= 0; cpu
< nr_cpu_ids
; cpu
++) {
245 if (qid
< VQ_REQUEST
|| (fs
->mq_map
[cpu
] == qid
- VQ_REQUEST
)) {
247 ret
= snprintf(buf
+ pos
, size
- pos
, "%u", cpu
);
249 ret
= snprintf(buf
+ pos
, size
- pos
, ", %u", cpu
);
251 if (ret
>= size
- pos
)
257 ret
= snprintf(buf
+ pos
, size
+ 1 - pos
, "\n");
261 static struct kobj_attribute virtio_fs_vq_cpu_list_attr
= __ATTR_RO(cpu_list
);
263 static struct attribute
*virtio_fs_vq_attrs
[] = {
264 &virtio_fs_vq_name_attr
.attr
,
265 &virtio_fs_vq_cpu_list_attr
.attr
,
269 static struct attribute_group virtio_fs_vq_attr_group
= {
270 .attrs
= virtio_fs_vq_attrs
,
273 /* Make sure virtiofs_mutex is held */
274 static void virtio_fs_put_locked(struct virtio_fs
*fs
)
276 lockdep_assert_held(&virtio_fs_mutex
);
278 kobject_put(&fs
->kobj
);
281 static void virtio_fs_put(struct virtio_fs
*fs
)
283 mutex_lock(&virtio_fs_mutex
);
284 virtio_fs_put_locked(fs
);
285 mutex_unlock(&virtio_fs_mutex
);
288 static void virtio_fs_fiq_release(struct fuse_iqueue
*fiq
)
290 struct virtio_fs
*vfs
= fiq
->priv
;
295 static void virtio_fs_drain_queue(struct virtio_fs_vq
*fsvq
)
297 WARN_ON(fsvq
->in_flight
< 0);
299 /* Wait for in flight requests to finish.*/
300 spin_lock(&fsvq
->lock
);
301 if (fsvq
->in_flight
) {
302 /* We are holding virtio_fs_mutex. There should not be any
303 * waiters waiting for completion.
305 reinit_completion(&fsvq
->in_flight_zero
);
306 spin_unlock(&fsvq
->lock
);
307 wait_for_completion(&fsvq
->in_flight_zero
);
309 spin_unlock(&fsvq
->lock
);
312 flush_work(&fsvq
->done_work
);
313 flush_work(&fsvq
->dispatch_work
);
316 static void virtio_fs_drain_all_queues_locked(struct virtio_fs
*fs
)
318 struct virtio_fs_vq
*fsvq
;
321 for (i
= 0; i
< fs
->nvqs
; i
++) {
323 virtio_fs_drain_queue(fsvq
);
327 static void virtio_fs_drain_all_queues(struct virtio_fs
*fs
)
329 /* Provides mutual exclusion between ->remove and ->kill_sb
330 * paths. We don't want both of these draining queue at the
331 * same time. Current completion logic reinits completion
332 * and that means there should not be any other thread
333 * doing reinit or waiting for completion already.
335 mutex_lock(&virtio_fs_mutex
);
336 virtio_fs_drain_all_queues_locked(fs
);
337 mutex_unlock(&virtio_fs_mutex
);
340 static void virtio_fs_start_all_queues(struct virtio_fs
*fs
)
342 struct virtio_fs_vq
*fsvq
;
345 for (i
= 0; i
< fs
->nvqs
; i
++) {
347 spin_lock(&fsvq
->lock
);
348 fsvq
->connected
= true;
349 spin_unlock(&fsvq
->lock
);
353 static void virtio_fs_delete_queues_sysfs(struct virtio_fs
*fs
)
355 struct virtio_fs_vq
*fsvq
;
358 for (i
= 0; i
< fs
->nvqs
; i
++) {
360 kobject_put(fsvq
->kobj
);
364 static int virtio_fs_add_queues_sysfs(struct virtio_fs
*fs
)
366 struct virtio_fs_vq
*fsvq
;
370 for (i
= 0; i
< fs
->nvqs
; i
++) {
373 sprintf(buff
, "%d", i
);
374 fsvq
->kobj
= kobject_create_and_add(buff
, fs
->mqs_kobj
);
380 ret
= sysfs_create_group(fsvq
->kobj
, &virtio_fs_vq_attr_group
);
382 kobject_put(fsvq
->kobj
);
390 for (j
= 0; j
< i
; j
++) {
392 kobject_put(fsvq
->kobj
);
397 /* Add a new instance to the list or return -EEXIST if tag name exists*/
398 static int virtio_fs_add_instance(struct virtio_device
*vdev
,
399 struct virtio_fs
*fs
)
401 struct virtio_fs
*fs2
;
404 mutex_lock(&virtio_fs_mutex
);
406 list_for_each_entry(fs2
, &virtio_fs_instances
, list
) {
407 if (strcmp(fs
->tag
, fs2
->tag
) == 0) {
408 mutex_unlock(&virtio_fs_mutex
);
413 /* Use the virtio_device's index as a unique identifier, there is no
414 * need to allocate our own identifiers because the virtio_fs instance
415 * is only visible to userspace as long as the underlying virtio_device
418 fs
->kobj
.kset
= virtio_fs_kset
;
419 ret
= kobject_add(&fs
->kobj
, NULL
, "%d", vdev
->index
);
423 fs
->mqs_kobj
= kobject_create_and_add("mqs", &fs
->kobj
);
429 ret
= sysfs_create_link(&fs
->kobj
, &vdev
->dev
.kobj
, "device");
433 ret
= virtio_fs_add_queues_sysfs(fs
);
437 list_add_tail(&fs
->list
, &virtio_fs_instances
);
439 mutex_unlock(&virtio_fs_mutex
);
441 kobject_uevent(&fs
->kobj
, KOBJ_ADD
);
446 sysfs_remove_link(&fs
->kobj
, "device");
448 kobject_put(fs
->mqs_kobj
);
450 kobject_del(&fs
->kobj
);
452 mutex_unlock(&virtio_fs_mutex
);
456 /* Return the virtio_fs with a given tag, or NULL */
457 static struct virtio_fs
*virtio_fs_find_instance(const char *tag
)
459 struct virtio_fs
*fs
;
461 mutex_lock(&virtio_fs_mutex
);
463 list_for_each_entry(fs
, &virtio_fs_instances
, list
) {
464 if (strcmp(fs
->tag
, tag
) == 0) {
465 kobject_get(&fs
->kobj
);
470 fs
= NULL
; /* not found */
473 mutex_unlock(&virtio_fs_mutex
);
478 static void virtio_fs_free_devs(struct virtio_fs
*fs
)
482 for (i
= 0; i
< fs
->nvqs
; i
++) {
483 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
488 fuse_dev_free(fsvq
->fud
);
493 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
494 static int virtio_fs_read_tag(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
496 char tag_buf
[sizeof_field(struct virtio_fs_config
, tag
)];
500 virtio_cread_bytes(vdev
, offsetof(struct virtio_fs_config
, tag
),
501 &tag_buf
, sizeof(tag_buf
));
502 end
= memchr(tag_buf
, '\0', sizeof(tag_buf
));
504 return -EINVAL
; /* empty tag */
506 end
= &tag_buf
[sizeof(tag_buf
)];
509 fs
->tag
= devm_kmalloc(&vdev
->dev
, len
+ 1, GFP_KERNEL
);
512 memcpy(fs
->tag
, tag_buf
, len
);
515 /* While the VIRTIO specification allows any character, newlines are
516 * awkward on mount(8) command-lines and cause problems in the sysfs
517 * "tag" attr and uevent TAG= properties. Forbid them.
519 if (strchr(fs
->tag
, '\n')) {
520 dev_dbg(&vdev
->dev
, "refusing virtiofs tag with newline character\n");
527 /* Work function for hiprio completion */
528 static void virtio_fs_hiprio_done_work(struct work_struct
*work
)
530 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
532 struct virtqueue
*vq
= fsvq
->vq
;
534 /* Free completed FUSE_FORGET requests */
535 spin_lock(&fsvq
->lock
);
540 virtqueue_disable_cb(vq
);
542 while ((req
= virtqueue_get_buf(vq
, &len
)) != NULL
) {
544 dec_in_flight_req(fsvq
);
546 } while (!virtqueue_enable_cb(vq
));
548 if (!list_empty(&fsvq
->queued_reqs
))
549 schedule_work(&fsvq
->dispatch_work
);
551 spin_unlock(&fsvq
->lock
);
554 static void virtio_fs_request_dispatch_work(struct work_struct
*work
)
556 struct fuse_req
*req
;
557 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
561 pr_debug("virtio-fs: worker %s called.\n", __func__
);
563 spin_lock(&fsvq
->lock
);
564 req
= list_first_entry_or_null(&fsvq
->end_reqs
, struct fuse_req
,
567 spin_unlock(&fsvq
->lock
);
571 list_del_init(&req
->list
);
572 spin_unlock(&fsvq
->lock
);
573 fuse_request_end(req
);
576 /* Dispatch pending requests */
578 spin_lock(&fsvq
->lock
);
579 req
= list_first_entry_or_null(&fsvq
->queued_reqs
,
580 struct fuse_req
, list
);
582 spin_unlock(&fsvq
->lock
);
585 list_del_init(&req
->list
);
586 spin_unlock(&fsvq
->lock
);
588 ret
= virtio_fs_enqueue_req(fsvq
, req
, true);
590 if (ret
== -ENOSPC
) {
591 spin_lock(&fsvq
->lock
);
592 list_add_tail(&req
->list
, &fsvq
->queued_reqs
);
593 spin_unlock(&fsvq
->lock
);
596 req
->out
.h
.error
= ret
;
597 spin_lock(&fsvq
->lock
);
598 dec_in_flight_req(fsvq
);
599 spin_unlock(&fsvq
->lock
);
600 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
602 fuse_request_end(req
);
608 * Returns 1 if queue is full and sender should wait a bit before sending
609 * next request, 0 otherwise.
611 static int send_forget_request(struct virtio_fs_vq
*fsvq
,
612 struct virtio_fs_forget
*forget
,
615 struct scatterlist sg
;
616 struct virtqueue
*vq
;
619 struct virtio_fs_forget_req
*req
= &forget
->req
;
621 spin_lock(&fsvq
->lock
);
622 if (!fsvq
->connected
) {
624 dec_in_flight_req(fsvq
);
629 sg_init_one(&sg
, req
, sizeof(*req
));
631 dev_dbg(&vq
->vdev
->dev
, "%s\n", __func__
);
633 ret
= virtqueue_add_outbuf(vq
, &sg
, 1, forget
, GFP_ATOMIC
);
635 if (ret
== -ENOSPC
) {
636 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
638 list_add_tail(&forget
->list
, &fsvq
->queued_reqs
);
640 inc_in_flight_req(fsvq
);
644 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
648 dec_in_flight_req(fsvq
);
654 inc_in_flight_req(fsvq
);
655 notify
= virtqueue_kick_prepare(vq
);
656 spin_unlock(&fsvq
->lock
);
659 virtqueue_notify(vq
);
662 spin_unlock(&fsvq
->lock
);
666 static void virtio_fs_hiprio_dispatch_work(struct work_struct
*work
)
668 struct virtio_fs_forget
*forget
;
669 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
671 pr_debug("virtio-fs: worker %s called.\n", __func__
);
673 spin_lock(&fsvq
->lock
);
674 forget
= list_first_entry_or_null(&fsvq
->queued_reqs
,
675 struct virtio_fs_forget
, list
);
677 spin_unlock(&fsvq
->lock
);
681 list_del(&forget
->list
);
682 spin_unlock(&fsvq
->lock
);
683 if (send_forget_request(fsvq
, forget
, true))
688 /* Allocate and copy args into req->argbuf */
689 static int copy_args_to_argbuf(struct fuse_req
*req
)
691 struct fuse_args
*args
= req
->args
;
692 unsigned int offset
= 0;
694 unsigned int num_out
;
698 num_in
= args
->in_numargs
- args
->in_pages
;
699 num_out
= args
->out_numargs
- args
->out_pages
;
700 len
= fuse_len_args(num_in
, (struct fuse_arg
*) args
->in_args
) +
701 fuse_len_args(num_out
, args
->out_args
);
703 req
->argbuf
= kmalloc(len
, GFP_ATOMIC
);
707 for (i
= 0; i
< num_in
; i
++) {
708 memcpy(req
->argbuf
+ offset
,
709 args
->in_args
[i
].value
,
710 args
->in_args
[i
].size
);
711 offset
+= args
->in_args
[i
].size
;
717 /* Copy args out of and free req->argbuf */
718 static void copy_args_from_argbuf(struct fuse_args
*args
, struct fuse_req
*req
)
720 unsigned int remaining
;
723 unsigned int num_out
;
726 remaining
= req
->out
.h
.len
- sizeof(req
->out
.h
);
727 num_in
= args
->in_numargs
- args
->in_pages
;
728 num_out
= args
->out_numargs
- args
->out_pages
;
729 offset
= fuse_len_args(num_in
, (struct fuse_arg
*)args
->in_args
);
731 for (i
= 0; i
< num_out
; i
++) {
732 unsigned int argsize
= args
->out_args
[i
].size
;
734 if (args
->out_argvar
&&
735 i
== args
->out_numargs
- 1 &&
736 argsize
> remaining
) {
740 memcpy(args
->out_args
[i
].value
, req
->argbuf
+ offset
, argsize
);
743 if (i
!= args
->out_numargs
- 1)
744 remaining
-= argsize
;
747 /* Store the actual size of the variable-length arg */
748 if (args
->out_argvar
)
749 args
->out_args
[args
->out_numargs
- 1].size
= remaining
;
755 /* Work function for request completion */
756 static void virtio_fs_request_complete(struct fuse_req
*req
,
757 struct virtio_fs_vq
*fsvq
)
759 struct fuse_pqueue
*fpq
= &fsvq
->fud
->pq
;
760 struct fuse_args
*args
;
761 struct fuse_args_pages
*ap
;
762 unsigned int len
, i
, thislen
;
766 * TODO verify that server properly follows FUSE protocol
770 copy_args_from_argbuf(args
, req
);
772 if (args
->out_pages
&& args
->page_zeroing
) {
773 len
= args
->out_args
[args
->out_numargs
- 1].size
;
774 ap
= container_of(args
, typeof(*ap
), args
);
775 for (i
= 0; i
< ap
->num_pages
; i
++) {
776 thislen
= ap
->descs
[i
].length
;
778 WARN_ON(ap
->descs
[i
].offset
);
780 zero_user_segment(page
, len
, thislen
);
788 spin_lock(&fpq
->lock
);
789 clear_bit(FR_SENT
, &req
->flags
);
790 spin_unlock(&fpq
->lock
);
792 fuse_request_end(req
);
793 spin_lock(&fsvq
->lock
);
794 dec_in_flight_req(fsvq
);
795 spin_unlock(&fsvq
->lock
);
798 static void virtio_fs_complete_req_work(struct work_struct
*work
)
800 struct virtio_fs_req_work
*w
=
801 container_of(work
, typeof(*w
), done_work
);
803 virtio_fs_request_complete(w
->req
, w
->fsvq
);
807 static void virtio_fs_requests_done_work(struct work_struct
*work
)
809 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
811 struct fuse_pqueue
*fpq
= &fsvq
->fud
->pq
;
812 struct virtqueue
*vq
= fsvq
->vq
;
813 struct fuse_req
*req
;
814 struct fuse_req
*next
;
818 /* Collect completed requests off the virtqueue */
819 spin_lock(&fsvq
->lock
);
821 virtqueue_disable_cb(vq
);
823 while ((req
= virtqueue_get_buf(vq
, &len
)) != NULL
) {
824 spin_lock(&fpq
->lock
);
825 list_move_tail(&req
->list
, &reqs
);
826 spin_unlock(&fpq
->lock
);
828 } while (!virtqueue_enable_cb(vq
));
829 spin_unlock(&fsvq
->lock
);
832 list_for_each_entry_safe(req
, next
, &reqs
, list
) {
833 list_del_init(&req
->list
);
835 /* blocking async request completes in a worker context */
836 if (req
->args
->may_block
) {
837 struct virtio_fs_req_work
*w
;
839 w
= kzalloc(sizeof(*w
), GFP_NOFS
| __GFP_NOFAIL
);
840 INIT_WORK(&w
->done_work
, virtio_fs_complete_req_work
);
843 schedule_work(&w
->done_work
);
845 virtio_fs_request_complete(req
, fsvq
);
849 /* Try to push previously queued requests, as the queue might no longer be full */
850 spin_lock(&fsvq
->lock
);
851 if (!list_empty(&fsvq
->queued_reqs
))
852 schedule_work(&fsvq
->dispatch_work
);
853 spin_unlock(&fsvq
->lock
);
856 static void virtio_fs_map_queues(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
858 const struct cpumask
*mask
, *masks
;
861 /* First attempt to map using existing transport layer affinities
864 if (!vdev
->config
->get_vq_affinity
)
867 for (q
= 0; q
< fs
->num_request_queues
; q
++) {
868 mask
= vdev
->config
->get_vq_affinity(vdev
, VQ_REQUEST
+ q
);
872 for_each_cpu(cpu
, mask
)
878 /* Attempt to map evenly in groups over the CPUs */
879 masks
= group_cpus_evenly(fs
->num_request_queues
);
880 /* If even this fails we default to all CPUs use queue zero */
882 for_each_possible_cpu(cpu
)
887 for (q
= 0; q
< fs
->num_request_queues
; q
++) {
888 for_each_cpu(cpu
, &masks
[q
])
894 /* Virtqueue interrupt handler */
895 static void virtio_fs_vq_done(struct virtqueue
*vq
)
897 struct virtio_fs_vq
*fsvq
= vq_to_fsvq(vq
);
899 dev_dbg(&vq
->vdev
->dev
, "%s %s\n", __func__
, fsvq
->name
);
901 schedule_work(&fsvq
->done_work
);
904 static void virtio_fs_init_vq(struct virtio_fs_vq
*fsvq
, char *name
,
907 strscpy(fsvq
->name
, name
, VQ_NAME_LEN
);
908 spin_lock_init(&fsvq
->lock
);
909 INIT_LIST_HEAD(&fsvq
->queued_reqs
);
910 INIT_LIST_HEAD(&fsvq
->end_reqs
);
911 init_completion(&fsvq
->in_flight_zero
);
913 if (vq_type
== VQ_REQUEST
) {
914 INIT_WORK(&fsvq
->done_work
, virtio_fs_requests_done_work
);
915 INIT_WORK(&fsvq
->dispatch_work
,
916 virtio_fs_request_dispatch_work
);
918 INIT_WORK(&fsvq
->done_work
, virtio_fs_hiprio_done_work
);
919 INIT_WORK(&fsvq
->dispatch_work
,
920 virtio_fs_hiprio_dispatch_work
);
924 /* Initialize virtqueues */
925 static int virtio_fs_setup_vqs(struct virtio_device
*vdev
,
926 struct virtio_fs
*fs
)
928 struct virtqueue_info
*vqs_info
;
929 struct virtqueue
**vqs
;
930 /* Specify pre_vectors to ensure that the queues before the
931 * request queues (e.g. hiprio) don't claim any of the CPUs in
932 * the multi-queue mapping and interrupt affinities
934 struct irq_affinity desc
= { .pre_vectors
= VQ_REQUEST
};
938 virtio_cread_le(vdev
, struct virtio_fs_config
, num_request_queues
,
939 &fs
->num_request_queues
);
940 if (fs
->num_request_queues
== 0)
943 /* Truncate nr of request queues to nr_cpu_id */
944 fs
->num_request_queues
= min_t(unsigned int, fs
->num_request_queues
,
946 fs
->nvqs
= VQ_REQUEST
+ fs
->num_request_queues
;
947 fs
->vqs
= kcalloc(fs
->nvqs
, sizeof(fs
->vqs
[VQ_HIPRIO
]), GFP_KERNEL
);
951 vqs
= kmalloc_array(fs
->nvqs
, sizeof(vqs
[VQ_HIPRIO
]), GFP_KERNEL
);
952 fs
->mq_map
= kcalloc_node(nr_cpu_ids
, sizeof(*fs
->mq_map
), GFP_KERNEL
,
953 dev_to_node(&vdev
->dev
));
954 vqs_info
= kcalloc(fs
->nvqs
, sizeof(*vqs_info
), GFP_KERNEL
);
955 if (!vqs
|| !vqs_info
|| !fs
->mq_map
) {
960 /* Initialize the hiprio/forget request virtqueue */
961 vqs_info
[VQ_HIPRIO
].callback
= virtio_fs_vq_done
;
962 virtio_fs_init_vq(&fs
->vqs
[VQ_HIPRIO
], "hiprio", VQ_HIPRIO
);
963 vqs_info
[VQ_HIPRIO
].name
= fs
->vqs
[VQ_HIPRIO
].name
;
965 /* Initialize the requests virtqueues */
966 for (i
= VQ_REQUEST
; i
< fs
->nvqs
; i
++) {
967 char vq_name
[VQ_NAME_LEN
];
969 snprintf(vq_name
, VQ_NAME_LEN
, "requests.%u", i
- VQ_REQUEST
);
970 virtio_fs_init_vq(&fs
->vqs
[i
], vq_name
, VQ_REQUEST
);
971 vqs_info
[i
].callback
= virtio_fs_vq_done
;
972 vqs_info
[i
].name
= fs
->vqs
[i
].name
;
975 ret
= virtio_find_vqs(vdev
, fs
->nvqs
, vqs
, vqs_info
, &desc
);
979 for (i
= 0; i
< fs
->nvqs
; i
++)
980 fs
->vqs
[i
].vq
= vqs
[i
];
982 virtio_fs_start_all_queues(fs
);
993 /* Free virtqueues (device must already be reset) */
994 static void virtio_fs_cleanup_vqs(struct virtio_device
*vdev
)
996 vdev
->config
->del_vqs(vdev
);
999 /* Map a window offset to a page frame number. The window offset will have
1000 * been produced by .iomap_begin(), which maps a file offset to a window
1003 static long virtio_fs_direct_access(struct dax_device
*dax_dev
, pgoff_t pgoff
,
1004 long nr_pages
, enum dax_access_mode mode
,
1005 void **kaddr
, pfn_t
*pfn
)
1007 struct virtio_fs
*fs
= dax_get_private(dax_dev
);
1008 phys_addr_t offset
= PFN_PHYS(pgoff
);
1009 size_t max_nr_pages
= fs
->window_len
/ PAGE_SIZE
- pgoff
;
1012 *kaddr
= fs
->window_kaddr
+ offset
;
1014 *pfn
= phys_to_pfn_t(fs
->window_phys_addr
+ offset
,
1016 return nr_pages
> max_nr_pages
? max_nr_pages
: nr_pages
;
1019 static int virtio_fs_zero_page_range(struct dax_device
*dax_dev
,
1020 pgoff_t pgoff
, size_t nr_pages
)
1025 rc
= dax_direct_access(dax_dev
, pgoff
, nr_pages
, DAX_ACCESS
, &kaddr
,
1028 return dax_mem2blk_err(rc
);
1030 memset(kaddr
, 0, nr_pages
<< PAGE_SHIFT
);
1031 dax_flush(dax_dev
, kaddr
, nr_pages
<< PAGE_SHIFT
);
1035 static const struct dax_operations virtio_fs_dax_ops
= {
1036 .direct_access
= virtio_fs_direct_access
,
1037 .zero_page_range
= virtio_fs_zero_page_range
,
1040 static void virtio_fs_cleanup_dax(void *data
)
1042 struct dax_device
*dax_dev
= data
;
1048 DEFINE_FREE(cleanup_dax
, struct dax_dev
*, if (!IS_ERR_OR_NULL(_T
)) virtio_fs_cleanup_dax(_T
))
1050 static int virtio_fs_setup_dax(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
1052 struct dax_device
*dax_dev
__free(cleanup_dax
) = NULL
;
1053 struct virtio_shm_region cache_reg
;
1054 struct dev_pagemap
*pgmap
;
1057 if (!IS_ENABLED(CONFIG_FUSE_DAX
))
1060 dax_dev
= alloc_dax(fs
, &virtio_fs_dax_ops
);
1061 if (IS_ERR(dax_dev
)) {
1062 int rc
= PTR_ERR(dax_dev
);
1063 return rc
== -EOPNOTSUPP
? 0 : rc
;
1066 /* Get cache region */
1067 have_cache
= virtio_get_shm_region(vdev
, &cache_reg
,
1068 (u8
)VIRTIO_FS_SHMCAP_ID_CACHE
);
1070 dev_notice(&vdev
->dev
, "%s: No cache capability\n", __func__
);
1074 if (!devm_request_mem_region(&vdev
->dev
, cache_reg
.addr
, cache_reg
.len
,
1075 dev_name(&vdev
->dev
))) {
1076 dev_warn(&vdev
->dev
, "could not reserve region addr=0x%llx len=0x%llx\n",
1077 cache_reg
.addr
, cache_reg
.len
);
1081 dev_notice(&vdev
->dev
, "Cache len: 0x%llx @ 0x%llx\n", cache_reg
.len
,
1084 pgmap
= devm_kzalloc(&vdev
->dev
, sizeof(*pgmap
), GFP_KERNEL
);
1088 pgmap
->type
= MEMORY_DEVICE_FS_DAX
;
1090 /* Ideally we would directly use the PCI BAR resource but
1091 * devm_memremap_pages() wants its own copy in pgmap. So
1092 * initialize a struct resource from scratch (only the start
1093 * and end fields will be used).
1095 pgmap
->range
= (struct range
) {
1096 .start
= (phys_addr_t
) cache_reg
.addr
,
1097 .end
= (phys_addr_t
) cache_reg
.addr
+ cache_reg
.len
- 1,
1099 pgmap
->nr_range
= 1;
1101 fs
->window_kaddr
= devm_memremap_pages(&vdev
->dev
, pgmap
);
1102 if (IS_ERR(fs
->window_kaddr
))
1103 return PTR_ERR(fs
->window_kaddr
);
1105 fs
->window_phys_addr
= (phys_addr_t
) cache_reg
.addr
;
1106 fs
->window_len
= (phys_addr_t
) cache_reg
.len
;
1108 dev_dbg(&vdev
->dev
, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
1109 __func__
, fs
->window_kaddr
, cache_reg
.addr
, cache_reg
.len
);
1111 fs
->dax_dev
= no_free_ptr(dax_dev
);
1112 return devm_add_action_or_reset(&vdev
->dev
, virtio_fs_cleanup_dax
,
1116 static int virtio_fs_probe(struct virtio_device
*vdev
)
1118 struct virtio_fs
*fs
;
1121 fs
= kzalloc(sizeof(*fs
), GFP_KERNEL
);
1124 kobject_init(&fs
->kobj
, &virtio_fs_ktype
);
1127 ret
= virtio_fs_read_tag(vdev
, fs
);
1131 ret
= virtio_fs_setup_vqs(vdev
, fs
);
1135 virtio_fs_map_queues(vdev
, fs
);
1137 ret
= virtio_fs_setup_dax(vdev
, fs
);
1141 /* Bring the device online in case the filesystem is mounted and
1142 * requests need to be sent before we return.
1144 virtio_device_ready(vdev
);
1146 ret
= virtio_fs_add_instance(vdev
, fs
);
1153 virtio_reset_device(vdev
);
1154 virtio_fs_cleanup_vqs(vdev
);
1158 kobject_put(&fs
->kobj
);
1162 static void virtio_fs_stop_all_queues(struct virtio_fs
*fs
)
1164 struct virtio_fs_vq
*fsvq
;
1167 for (i
= 0; i
< fs
->nvqs
; i
++) {
1169 spin_lock(&fsvq
->lock
);
1170 fsvq
->connected
= false;
1171 spin_unlock(&fsvq
->lock
);
1175 static void virtio_fs_remove(struct virtio_device
*vdev
)
1177 struct virtio_fs
*fs
= vdev
->priv
;
1179 mutex_lock(&virtio_fs_mutex
);
1180 /* This device is going away. No one should get new reference */
1181 list_del_init(&fs
->list
);
1182 virtio_fs_delete_queues_sysfs(fs
);
1183 sysfs_remove_link(&fs
->kobj
, "device");
1184 kobject_put(fs
->mqs_kobj
);
1185 kobject_del(&fs
->kobj
);
1186 virtio_fs_stop_all_queues(fs
);
1187 virtio_fs_drain_all_queues_locked(fs
);
1188 virtio_reset_device(vdev
);
1189 virtio_fs_cleanup_vqs(vdev
);
1192 /* Put device reference on virtio_fs object */
1193 virtio_fs_put_locked(fs
);
1194 mutex_unlock(&virtio_fs_mutex
);
1197 #ifdef CONFIG_PM_SLEEP
1198 static int virtio_fs_freeze(struct virtio_device
*vdev
)
1200 /* TODO need to save state here */
1201 pr_warn("virtio-fs: suspend/resume not yet supported\n");
1205 static int virtio_fs_restore(struct virtio_device
*vdev
)
1207 /* TODO need to restore state here */
1210 #endif /* CONFIG_PM_SLEEP */
1212 static const struct virtio_device_id id_table
[] = {
1213 { VIRTIO_ID_FS
, VIRTIO_DEV_ANY_ID
},
1217 static const unsigned int feature_table
[] = {};
1219 static struct virtio_driver virtio_fs_driver
= {
1220 .driver
.name
= KBUILD_MODNAME
,
1221 .id_table
= id_table
,
1222 .feature_table
= feature_table
,
1223 .feature_table_size
= ARRAY_SIZE(feature_table
),
1224 .probe
= virtio_fs_probe
,
1225 .remove
= virtio_fs_remove
,
1226 #ifdef CONFIG_PM_SLEEP
1227 .freeze
= virtio_fs_freeze
,
1228 .restore
= virtio_fs_restore
,
1232 static void virtio_fs_send_forget(struct fuse_iqueue
*fiq
, struct fuse_forget_link
*link
)
1234 struct virtio_fs_forget
*forget
;
1235 struct virtio_fs_forget_req
*req
;
1236 struct virtio_fs
*fs
= fiq
->priv
;
1237 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[VQ_HIPRIO
];
1238 u64 unique
= fuse_get_unique(fiq
);
1240 /* Allocate a buffer for the request */
1241 forget
= kmalloc(sizeof(*forget
), GFP_NOFS
| __GFP_NOFAIL
);
1244 req
->ih
= (struct fuse_in_header
){
1245 .opcode
= FUSE_FORGET
,
1246 .nodeid
= link
->forget_one
.nodeid
,
1248 .len
= sizeof(*req
),
1250 req
->arg
= (struct fuse_forget_in
){
1251 .nlookup
= link
->forget_one
.nlookup
,
1254 send_forget_request(fsvq
, forget
, false);
1258 static void virtio_fs_send_interrupt(struct fuse_iqueue
*fiq
, struct fuse_req
*req
)
1263 * Normal fs operations on a local filesystems aren't interruptible.
1264 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
1265 * with shared lock between host and guest.
1269 /* Count number of scatter-gather elements required */
1270 static unsigned int sg_count_fuse_pages(struct fuse_page_desc
*page_descs
,
1271 unsigned int num_pages
,
1272 unsigned int total_len
)
1275 unsigned int this_len
;
1277 for (i
= 0; i
< num_pages
&& total_len
; i
++) {
1278 this_len
= min(page_descs
[i
].length
, total_len
);
1279 total_len
-= this_len
;
1285 /* Return the number of scatter-gather list elements required */
1286 static unsigned int sg_count_fuse_req(struct fuse_req
*req
)
1288 struct fuse_args
*args
= req
->args
;
1289 struct fuse_args_pages
*ap
= container_of(args
, typeof(*ap
), args
);
1290 unsigned int size
, total_sgs
= 1 /* fuse_in_header */;
1292 if (args
->in_numargs
- args
->in_pages
)
1295 if (args
->in_pages
) {
1296 size
= args
->in_args
[args
->in_numargs
- 1].size
;
1297 total_sgs
+= sg_count_fuse_pages(ap
->descs
, ap
->num_pages
,
1301 if (!test_bit(FR_ISREPLY
, &req
->flags
))
1304 total_sgs
+= 1 /* fuse_out_header */;
1306 if (args
->out_numargs
- args
->out_pages
)
1309 if (args
->out_pages
) {
1310 size
= args
->out_args
[args
->out_numargs
- 1].size
;
1311 total_sgs
+= sg_count_fuse_pages(ap
->descs
, ap
->num_pages
,
1318 /* Add pages to scatter-gather list and return number of elements used */
1319 static unsigned int sg_init_fuse_pages(struct scatterlist
*sg
,
1320 struct page
**pages
,
1321 struct fuse_page_desc
*page_descs
,
1322 unsigned int num_pages
,
1323 unsigned int total_len
)
1326 unsigned int this_len
;
1328 for (i
= 0; i
< num_pages
&& total_len
; i
++) {
1329 sg_init_table(&sg
[i
], 1);
1330 this_len
= min(page_descs
[i
].length
, total_len
);
1331 sg_set_page(&sg
[i
], pages
[i
], this_len
, page_descs
[i
].offset
);
1332 total_len
-= this_len
;
1338 /* Add args to scatter-gather list and return number of elements used */
1339 static unsigned int sg_init_fuse_args(struct scatterlist
*sg
,
1340 struct fuse_req
*req
,
1341 struct fuse_arg
*args
,
1342 unsigned int numargs
,
1345 unsigned int *len_used
)
1347 struct fuse_args_pages
*ap
= container_of(req
->args
, typeof(*ap
), args
);
1348 unsigned int total_sgs
= 0;
1351 len
= fuse_len_args(numargs
- argpages
, args
);
1353 sg_init_one(&sg
[total_sgs
++], argbuf
, len
);
1356 total_sgs
+= sg_init_fuse_pages(&sg
[total_sgs
],
1357 ap
->pages
, ap
->descs
,
1359 args
[numargs
- 1].size
);
1367 /* Add a request to a virtqueue and kick the device */
1368 static int virtio_fs_enqueue_req(struct virtio_fs_vq
*fsvq
,
1369 struct fuse_req
*req
, bool in_flight
)
1371 /* requests need at least 4 elements */
1372 struct scatterlist
*stack_sgs
[6];
1373 struct scatterlist stack_sg
[ARRAY_SIZE(stack_sgs
)];
1374 struct scatterlist
**sgs
= stack_sgs
;
1375 struct scatterlist
*sg
= stack_sg
;
1376 struct virtqueue
*vq
;
1377 struct fuse_args
*args
= req
->args
;
1378 unsigned int argbuf_used
= 0;
1379 unsigned int out_sgs
= 0;
1380 unsigned int in_sgs
= 0;
1381 unsigned int total_sgs
;
1385 struct fuse_pqueue
*fpq
;
1387 /* Does the sglist fit on the stack? */
1388 total_sgs
= sg_count_fuse_req(req
);
1389 if (total_sgs
> ARRAY_SIZE(stack_sgs
)) {
1390 sgs
= kmalloc_array(total_sgs
, sizeof(sgs
[0]), GFP_ATOMIC
);
1391 sg
= kmalloc_array(total_sgs
, sizeof(sg
[0]), GFP_ATOMIC
);
1398 /* Use a bounce buffer since stack args cannot be mapped */
1399 ret
= copy_args_to_argbuf(req
);
1403 /* Request elements */
1404 sg_init_one(&sg
[out_sgs
++], &req
->in
.h
, sizeof(req
->in
.h
));
1405 out_sgs
+= sg_init_fuse_args(&sg
[out_sgs
], req
,
1406 (struct fuse_arg
*)args
->in_args
,
1407 args
->in_numargs
, args
->in_pages
,
1408 req
->argbuf
, &argbuf_used
);
1410 /* Reply elements */
1411 if (test_bit(FR_ISREPLY
, &req
->flags
)) {
1412 sg_init_one(&sg
[out_sgs
+ in_sgs
++],
1413 &req
->out
.h
, sizeof(req
->out
.h
));
1414 in_sgs
+= sg_init_fuse_args(&sg
[out_sgs
+ in_sgs
], req
,
1415 args
->out_args
, args
->out_numargs
,
1417 req
->argbuf
+ argbuf_used
, NULL
);
1420 WARN_ON(out_sgs
+ in_sgs
!= total_sgs
);
1422 for (i
= 0; i
< total_sgs
; i
++)
1425 spin_lock(&fsvq
->lock
);
1427 if (!fsvq
->connected
) {
1428 spin_unlock(&fsvq
->lock
);
1434 ret
= virtqueue_add_sgs(vq
, sgs
, out_sgs
, in_sgs
, req
, GFP_ATOMIC
);
1436 spin_unlock(&fsvq
->lock
);
1440 /* Request successfully sent. */
1441 fpq
= &fsvq
->fud
->pq
;
1442 spin_lock(&fpq
->lock
);
1443 list_add_tail(&req
->list
, fpq
->processing
);
1444 spin_unlock(&fpq
->lock
);
1445 set_bit(FR_SENT
, &req
->flags
);
1446 /* matches barrier in request_wait_answer() */
1447 smp_mb__after_atomic();
1450 inc_in_flight_req(fsvq
);
1451 notify
= virtqueue_kick_prepare(vq
);
1453 spin_unlock(&fsvq
->lock
);
1456 virtqueue_notify(vq
);
1459 if (ret
< 0 && req
->argbuf
) {
1463 if (sgs
!= stack_sgs
) {
1471 static void virtio_fs_send_req(struct fuse_iqueue
*fiq
, struct fuse_req
*req
)
1473 unsigned int queue_id
;
1474 struct virtio_fs
*fs
;
1475 struct virtio_fs_vq
*fsvq
;
1478 if (req
->in
.h
.opcode
!= FUSE_NOTIFY_REPLY
)
1479 req
->in
.h
.unique
= fuse_get_unique(fiq
);
1481 clear_bit(FR_PENDING
, &req
->flags
);
1484 queue_id
= VQ_REQUEST
+ fs
->mq_map
[raw_smp_processor_id()];
1486 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n",
1487 __func__
, req
->in
.h
.opcode
, req
->in
.h
.unique
,
1488 req
->in
.h
.nodeid
, req
->in
.h
.len
,
1489 fuse_len_args(req
->args
->out_numargs
, req
->args
->out_args
),
1492 fsvq
= &fs
->vqs
[queue_id
];
1493 ret
= virtio_fs_enqueue_req(fsvq
, req
, false);
1495 if (ret
== -ENOSPC
) {
1497 * Virtqueue full. Retry submission from worker
1498 * context as we might be holding fc->bg_lock.
1500 spin_lock(&fsvq
->lock
);
1501 list_add_tail(&req
->list
, &fsvq
->queued_reqs
);
1502 inc_in_flight_req(fsvq
);
1503 spin_unlock(&fsvq
->lock
);
1506 req
->out
.h
.error
= ret
;
1507 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret
);
1509 /* Can't end request in submission context. Use a worker */
1510 spin_lock(&fsvq
->lock
);
1511 list_add_tail(&req
->list
, &fsvq
->end_reqs
);
1512 schedule_work(&fsvq
->dispatch_work
);
1513 spin_unlock(&fsvq
->lock
);
1518 static const struct fuse_iqueue_ops virtio_fs_fiq_ops
= {
1519 .send_forget
= virtio_fs_send_forget
,
1520 .send_interrupt
= virtio_fs_send_interrupt
,
1521 .send_req
= virtio_fs_send_req
,
1522 .release
= virtio_fs_fiq_release
,
1525 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context
*ctx
)
1527 ctx
->rootmode
= S_IFDIR
;
1528 ctx
->default_permissions
= 1;
1529 ctx
->allow_other
= 1;
1530 ctx
->max_read
= UINT_MAX
;
1532 ctx
->destroy
= true;
1533 ctx
->no_control
= true;
1534 ctx
->no_force_umount
= true;
1537 static int virtio_fs_fill_super(struct super_block
*sb
, struct fs_context
*fsc
)
1539 struct fuse_mount
*fm
= get_fuse_mount_super(sb
);
1540 struct fuse_conn
*fc
= fm
->fc
;
1541 struct virtio_fs
*fs
= fc
->iq
.priv
;
1542 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
1546 virtio_fs_ctx_set_defaults(ctx
);
1547 mutex_lock(&virtio_fs_mutex
);
1549 /* After holding mutex, make sure virtiofs device is still there.
1550 * Though we are holding a reference to it, drive ->remove might
1551 * still have cleaned up virtual queues. In that case bail out.
1554 if (list_empty(&fs
->list
)) {
1555 pr_info("virtio-fs: tag <%s> not found\n", fs
->tag
);
1560 /* Allocate fuse_dev for hiprio and notification queues */
1561 for (i
= 0; i
< fs
->nvqs
; i
++) {
1562 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
1564 fsvq
->fud
= fuse_dev_alloc();
1566 goto err_free_fuse_devs
;
1569 /* virtiofs allocates and installs its own fuse devices */
1571 if (ctx
->dax_mode
!= FUSE_DAX_NEVER
) {
1572 if (ctx
->dax_mode
== FUSE_DAX_ALWAYS
&& !fs
->dax_dev
) {
1574 pr_err("virtio-fs: dax can't be enabled as filesystem"
1575 " device does not support it.\n");
1576 goto err_free_fuse_devs
;
1578 ctx
->dax_dev
= fs
->dax_dev
;
1580 err
= fuse_fill_super_common(sb
, ctx
);
1582 goto err_free_fuse_devs
;
1584 for (i
= 0; i
< fs
->nvqs
; i
++) {
1585 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
1587 fuse_dev_install(fsvq
->fud
, fc
);
1590 /* Previous unmount will stop all queues. Start these again */
1591 virtio_fs_start_all_queues(fs
);
1593 mutex_unlock(&virtio_fs_mutex
);
1597 virtio_fs_free_devs(fs
);
1599 mutex_unlock(&virtio_fs_mutex
);
1603 static void virtio_fs_conn_destroy(struct fuse_mount
*fm
)
1605 struct fuse_conn
*fc
= fm
->fc
;
1606 struct virtio_fs
*vfs
= fc
->iq
.priv
;
1607 struct virtio_fs_vq
*fsvq
= &vfs
->vqs
[VQ_HIPRIO
];
1609 /* Stop dax worker. Soon evict_inodes() will be called which
1610 * will free all memory ranges belonging to all inodes.
1612 if (IS_ENABLED(CONFIG_FUSE_DAX
))
1613 fuse_dax_cancel_work(fc
);
1615 /* Stop forget queue. Soon destroy will be sent */
1616 spin_lock(&fsvq
->lock
);
1617 fsvq
->connected
= false;
1618 spin_unlock(&fsvq
->lock
);
1619 virtio_fs_drain_all_queues(vfs
);
1621 fuse_conn_destroy(fm
);
1623 /* fuse_conn_destroy() must have sent destroy. Stop all queues
1624 * and drain one more time and free fuse devices. Freeing fuse
1625 * devices will drop their reference on fuse_conn and that in
1626 * turn will drop its reference on virtio_fs object.
1628 virtio_fs_stop_all_queues(vfs
);
1629 virtio_fs_drain_all_queues(vfs
);
1630 virtio_fs_free_devs(vfs
);
1633 static void virtio_kill_sb(struct super_block
*sb
)
1635 struct fuse_mount
*fm
= get_fuse_mount_super(sb
);
1638 /* If mount failed, we can still be called without any fc */
1640 last
= fuse_mount_remove(fm
);
1642 virtio_fs_conn_destroy(fm
);
1644 kill_anon_super(sb
);
1645 fuse_mount_destroy(fm
);
1648 static int virtio_fs_test_super(struct super_block
*sb
,
1649 struct fs_context
*fsc
)
1651 struct fuse_mount
*fsc_fm
= fsc
->s_fs_info
;
1652 struct fuse_mount
*sb_fm
= get_fuse_mount_super(sb
);
1654 return fsc_fm
->fc
->iq
.priv
== sb_fm
->fc
->iq
.priv
;
1657 static int virtio_fs_get_tree(struct fs_context
*fsc
)
1659 struct virtio_fs
*fs
;
1660 struct super_block
*sb
;
1661 struct fuse_conn
*fc
= NULL
;
1662 struct fuse_mount
*fm
;
1663 unsigned int virtqueue_size
;
1666 /* This gets a reference on virtio_fs object. This ptr gets installed
1667 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1668 * to drop the reference to this object.
1670 fs
= virtio_fs_find_instance(fsc
->source
);
1672 pr_info("virtio-fs: tag <%s> not found\n", fsc
->source
);
1676 virtqueue_size
= virtqueue_get_vring_size(fs
->vqs
[VQ_REQUEST
].vq
);
1677 if (WARN_ON(virtqueue_size
<= FUSE_HEADER_OVERHEAD
))
1681 fc
= kzalloc(sizeof(struct fuse_conn
), GFP_KERNEL
);
1685 fm
= kzalloc(sizeof(struct fuse_mount
), GFP_KERNEL
);
1689 fuse_conn_init(fc
, fm
, fsc
->user_ns
, &virtio_fs_fiq_ops
, fs
);
1690 fc
->release
= fuse_free_conn
;
1691 fc
->delete_stale
= true;
1692 fc
->auto_submounts
= true;
1695 /* Tell FUSE to split requests that exceed the virtqueue's size */
1696 fc
->max_pages_limit
= min_t(unsigned int, fc
->max_pages_limit
,
1697 virtqueue_size
- FUSE_HEADER_OVERHEAD
);
1699 fsc
->s_fs_info
= fm
;
1700 sb
= sget_fc(fsc
, virtio_fs_test_super
, set_anon_super_fc
);
1702 fuse_mount_destroy(fm
);
1707 err
= virtio_fs_fill_super(sb
, fsc
);
1709 deactivate_locked_super(sb
);
1713 sb
->s_flags
|= SB_ACTIVE
;
1717 fsc
->root
= dget(sb
->s_root
);
1726 static const struct fs_context_operations virtio_fs_context_ops
= {
1727 .free
= virtio_fs_free_fsc
,
1728 .parse_param
= virtio_fs_parse_param
,
1729 .get_tree
= virtio_fs_get_tree
,
1732 static int virtio_fs_init_fs_context(struct fs_context
*fsc
)
1734 struct fuse_fs_context
*ctx
;
1736 if (fsc
->purpose
== FS_CONTEXT_FOR_SUBMOUNT
)
1737 return fuse_init_fs_context_submount(fsc
);
1739 ctx
= kzalloc(sizeof(struct fuse_fs_context
), GFP_KERNEL
);
1742 fsc
->fs_private
= ctx
;
1743 fsc
->ops
= &virtio_fs_context_ops
;
1747 static struct file_system_type virtio_fs_type
= {
1748 .owner
= THIS_MODULE
,
1750 .init_fs_context
= virtio_fs_init_fs_context
,
1751 .kill_sb
= virtio_kill_sb
,
1752 .fs_flags
= FS_ALLOW_IDMAP
,
1755 static int virtio_fs_uevent(const struct kobject
*kobj
, struct kobj_uevent_env
*env
)
1757 const struct virtio_fs
*fs
= container_of(kobj
, struct virtio_fs
, kobj
);
1759 add_uevent_var(env
, "TAG=%s", fs
->tag
);
1763 static const struct kset_uevent_ops virtio_fs_uevent_ops
= {
1764 .uevent
= virtio_fs_uevent
,
1767 static int __init
virtio_fs_sysfs_init(void)
1769 virtio_fs_kset
= kset_create_and_add("virtiofs", &virtio_fs_uevent_ops
,
1771 if (!virtio_fs_kset
)
1776 static void virtio_fs_sysfs_exit(void)
1778 kset_unregister(virtio_fs_kset
);
1779 virtio_fs_kset
= NULL
;
1782 static int __init
virtio_fs_init(void)
1786 ret
= virtio_fs_sysfs_init();
1790 ret
= register_virtio_driver(&virtio_fs_driver
);
1794 ret
= register_filesystem(&virtio_fs_type
);
1796 goto unregister_virtio_driver
;
1800 unregister_virtio_driver
:
1801 unregister_virtio_driver(&virtio_fs_driver
);
1803 virtio_fs_sysfs_exit();
1806 module_init(virtio_fs_init
);
1808 static void __exit
virtio_fs_exit(void)
1810 unregister_filesystem(&virtio_fs_type
);
1811 unregister_virtio_driver(&virtio_fs_driver
);
1812 virtio_fs_sysfs_exit();
1814 module_exit(virtio_fs_exit
);
1816 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1817 MODULE_DESCRIPTION("Virtio Filesystem");
1818 MODULE_LICENSE("GPL");
1819 MODULE_ALIAS_FS(KBUILD_MODNAME
);
1820 MODULE_DEVICE_TABLE(virtio
, id_table
);