1 // SPDX-License-Identifier: GPL-2.0
3 * virtio-fs: Virtio Filesystem
4 * Copyright (C) 2018 Red Hat, Inc.
10 #include <linux/interrupt.h>
11 #include <linux/group_cpus.h>
12 #include <linux/pfn_t.h>
13 #include <linux/memremap.h>
14 #include <linux/module.h>
15 #include <linux/virtio.h>
16 #include <linux/virtio_fs.h>
17 #include <linux/delay.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/highmem.h>
21 #include <linux/cleanup.h>
22 #include <linux/uio.h>
25 /* Used to help calculate the FUSE connection's max_pages limit for a request's
26 * size. Parts of the struct fuse_req are sliced into scattergather lists in
27 * addition to the pages used, so this can help account for that overhead.
29 #define FUSE_HEADER_OVERHEAD 4
31 /* List of virtio-fs device instances and a lock for the list. Also provides
32 * mutual exclusion in device removal and mounting path
34 static DEFINE_MUTEX(virtio_fs_mutex
);
35 static LIST_HEAD(virtio_fs_instances
);
37 /* The /sys/fs/virtio_fs/ kset */
38 static struct kset
*virtio_fs_kset
;
45 #define VQ_NAME_LEN 24
47 /* Per-virtqueue state */
50 struct virtqueue
*vq
; /* protected by ->lock */
51 struct work_struct done_work
;
52 struct list_head queued_reqs
;
53 struct list_head end_reqs
; /* End these requests */
54 struct work_struct dispatch_work
;
58 struct completion in_flight_zero
; /* No inflight requests */
60 char name
[VQ_NAME_LEN
];
61 } ____cacheline_aligned_in_smp
;
63 /* A virtio-fs device instance */
66 struct kobject
*mqs_kobj
;
67 struct list_head list
; /* on virtio_fs_instances */
69 struct virtio_fs_vq
*vqs
;
70 unsigned int nvqs
; /* number of virtqueues */
71 unsigned int num_request_queues
; /* number of request queues */
72 struct dax_device
*dax_dev
;
74 unsigned int *mq_map
; /* index = cpu id, value = request vq id */
76 /* DAX memory window where file contents are mapped */
78 phys_addr_t window_phys_addr
;
82 struct virtio_fs_forget_req
{
83 struct fuse_in_header ih
;
84 struct fuse_forget_in arg
;
87 struct virtio_fs_forget
{
88 /* This request can be temporarily queued on virt queue */
89 struct list_head list
;
90 struct virtio_fs_forget_req req
;
93 struct virtio_fs_req_work
{
95 struct virtio_fs_vq
*fsvq
;
96 struct work_struct done_work
;
99 static int virtio_fs_enqueue_req(struct virtio_fs_vq
*fsvq
,
100 struct fuse_req
*req
, bool in_flight
,
103 static const struct constant_table dax_param_enums
[] = {
104 {"always", FUSE_DAX_ALWAYS
},
105 {"never", FUSE_DAX_NEVER
},
106 {"inode", FUSE_DAX_INODE_USER
},
115 static const struct fs_parameter_spec virtio_fs_parameters
[] = {
116 fsparam_flag("dax", OPT_DAX
),
117 fsparam_enum("dax", OPT_DAX_ENUM
, dax_param_enums
),
121 static int virtio_fs_parse_param(struct fs_context
*fsc
,
122 struct fs_parameter
*param
)
124 struct fs_parse_result result
;
125 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
128 opt
= fs_parse(fsc
, virtio_fs_parameters
, param
, &result
);
134 ctx
->dax_mode
= FUSE_DAX_ALWAYS
;
137 ctx
->dax_mode
= result
.uint_32
;
146 static void virtio_fs_free_fsc(struct fs_context
*fsc
)
148 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
153 static inline struct virtio_fs_vq
*vq_to_fsvq(struct virtqueue
*vq
)
155 struct virtio_fs
*fs
= vq
->vdev
->priv
;
157 return &fs
->vqs
[vq
->index
];
160 /* Should be called with fsvq->lock held. */
161 static inline void inc_in_flight_req(struct virtio_fs_vq
*fsvq
)
166 /* Should be called with fsvq->lock held. */
167 static inline void dec_in_flight_req(struct virtio_fs_vq
*fsvq
)
169 WARN_ON(fsvq
->in_flight
<= 0);
171 if (!fsvq
->in_flight
)
172 complete(&fsvq
->in_flight_zero
);
175 static ssize_t
tag_show(struct kobject
*kobj
,
176 struct kobj_attribute
*attr
, char *buf
)
178 struct virtio_fs
*fs
= container_of(kobj
, struct virtio_fs
, kobj
);
180 return sysfs_emit(buf
, "%s\n", fs
->tag
);
183 static struct kobj_attribute virtio_fs_tag_attr
= __ATTR_RO(tag
);
185 static struct attribute
*virtio_fs_attrs
[] = {
186 &virtio_fs_tag_attr
.attr
,
189 ATTRIBUTE_GROUPS(virtio_fs
);
191 static void virtio_fs_ktype_release(struct kobject
*kobj
)
193 struct virtio_fs
*vfs
= container_of(kobj
, struct virtio_fs
, kobj
);
200 static const struct kobj_type virtio_fs_ktype
= {
201 .release
= virtio_fs_ktype_release
,
202 .sysfs_ops
= &kobj_sysfs_ops
,
203 .default_groups
= virtio_fs_groups
,
206 static struct virtio_fs_vq
*virtio_fs_kobj_to_vq(struct virtio_fs
*fs
,
207 struct kobject
*kobj
)
211 for (i
= 0; i
< fs
->nvqs
; i
++) {
212 if (kobj
== fs
->vqs
[i
].kobj
)
218 static ssize_t
name_show(struct kobject
*kobj
,
219 struct kobj_attribute
*attr
, char *buf
)
221 struct virtio_fs
*fs
= container_of(kobj
->parent
->parent
, struct virtio_fs
, kobj
);
222 struct virtio_fs_vq
*fsvq
= virtio_fs_kobj_to_vq(fs
, kobj
);
226 return sysfs_emit(buf
, "%s\n", fsvq
->name
);
229 static struct kobj_attribute virtio_fs_vq_name_attr
= __ATTR_RO(name
);
231 static ssize_t
cpu_list_show(struct kobject
*kobj
,
232 struct kobj_attribute
*attr
, char *buf
)
234 struct virtio_fs
*fs
= container_of(kobj
->parent
->parent
, struct virtio_fs
, kobj
);
235 struct virtio_fs_vq
*fsvq
= virtio_fs_kobj_to_vq(fs
, kobj
);
236 unsigned int cpu
, qid
;
237 const size_t size
= PAGE_SIZE
- 1;
239 int ret
= 0, pos
= 0;
244 qid
= fsvq
->vq
->index
;
245 for (cpu
= 0; cpu
< nr_cpu_ids
; cpu
++) {
246 if (qid
< VQ_REQUEST
|| (fs
->mq_map
[cpu
] == qid
)) {
248 ret
= snprintf(buf
+ pos
, size
- pos
, "%u", cpu
);
250 ret
= snprintf(buf
+ pos
, size
- pos
, ", %u", cpu
);
252 if (ret
>= size
- pos
)
258 ret
= snprintf(buf
+ pos
, size
+ 1 - pos
, "\n");
262 static struct kobj_attribute virtio_fs_vq_cpu_list_attr
= __ATTR_RO(cpu_list
);
264 static struct attribute
*virtio_fs_vq_attrs
[] = {
265 &virtio_fs_vq_name_attr
.attr
,
266 &virtio_fs_vq_cpu_list_attr
.attr
,
270 static struct attribute_group virtio_fs_vq_attr_group
= {
271 .attrs
= virtio_fs_vq_attrs
,
274 /* Make sure virtiofs_mutex is held */
275 static void virtio_fs_put_locked(struct virtio_fs
*fs
)
277 lockdep_assert_held(&virtio_fs_mutex
);
279 kobject_put(&fs
->kobj
);
282 static void virtio_fs_put(struct virtio_fs
*fs
)
284 mutex_lock(&virtio_fs_mutex
);
285 virtio_fs_put_locked(fs
);
286 mutex_unlock(&virtio_fs_mutex
);
289 static void virtio_fs_fiq_release(struct fuse_iqueue
*fiq
)
291 struct virtio_fs
*vfs
= fiq
->priv
;
296 static void virtio_fs_drain_queue(struct virtio_fs_vq
*fsvq
)
298 WARN_ON(fsvq
->in_flight
< 0);
300 /* Wait for in flight requests to finish.*/
301 spin_lock(&fsvq
->lock
);
302 if (fsvq
->in_flight
) {
303 /* We are holding virtio_fs_mutex. There should not be any
304 * waiters waiting for completion.
306 reinit_completion(&fsvq
->in_flight_zero
);
307 spin_unlock(&fsvq
->lock
);
308 wait_for_completion(&fsvq
->in_flight_zero
);
310 spin_unlock(&fsvq
->lock
);
313 flush_work(&fsvq
->done_work
);
314 flush_work(&fsvq
->dispatch_work
);
317 static void virtio_fs_drain_all_queues_locked(struct virtio_fs
*fs
)
319 struct virtio_fs_vq
*fsvq
;
322 for (i
= 0; i
< fs
->nvqs
; i
++) {
324 virtio_fs_drain_queue(fsvq
);
328 static void virtio_fs_drain_all_queues(struct virtio_fs
*fs
)
330 /* Provides mutual exclusion between ->remove and ->kill_sb
331 * paths. We don't want both of these draining queue at the
332 * same time. Current completion logic reinits completion
333 * and that means there should not be any other thread
334 * doing reinit or waiting for completion already.
336 mutex_lock(&virtio_fs_mutex
);
337 virtio_fs_drain_all_queues_locked(fs
);
338 mutex_unlock(&virtio_fs_mutex
);
341 static void virtio_fs_start_all_queues(struct virtio_fs
*fs
)
343 struct virtio_fs_vq
*fsvq
;
346 for (i
= 0; i
< fs
->nvqs
; i
++) {
348 spin_lock(&fsvq
->lock
);
349 fsvq
->connected
= true;
350 spin_unlock(&fsvq
->lock
);
354 static void virtio_fs_delete_queues_sysfs(struct virtio_fs
*fs
)
356 struct virtio_fs_vq
*fsvq
;
359 for (i
= 0; i
< fs
->nvqs
; i
++) {
361 kobject_put(fsvq
->kobj
);
365 static int virtio_fs_add_queues_sysfs(struct virtio_fs
*fs
)
367 struct virtio_fs_vq
*fsvq
;
371 for (i
= 0; i
< fs
->nvqs
; i
++) {
374 sprintf(buff
, "%d", i
);
375 fsvq
->kobj
= kobject_create_and_add(buff
, fs
->mqs_kobj
);
381 ret
= sysfs_create_group(fsvq
->kobj
, &virtio_fs_vq_attr_group
);
383 kobject_put(fsvq
->kobj
);
391 for (j
= 0; j
< i
; j
++) {
393 kobject_put(fsvq
->kobj
);
398 /* Add a new instance to the list or return -EEXIST if tag name exists*/
399 static int virtio_fs_add_instance(struct virtio_device
*vdev
,
400 struct virtio_fs
*fs
)
402 struct virtio_fs
*fs2
;
405 mutex_lock(&virtio_fs_mutex
);
407 list_for_each_entry(fs2
, &virtio_fs_instances
, list
) {
408 if (strcmp(fs
->tag
, fs2
->tag
) == 0) {
409 mutex_unlock(&virtio_fs_mutex
);
414 /* Use the virtio_device's index as a unique identifier, there is no
415 * need to allocate our own identifiers because the virtio_fs instance
416 * is only visible to userspace as long as the underlying virtio_device
419 fs
->kobj
.kset
= virtio_fs_kset
;
420 ret
= kobject_add(&fs
->kobj
, NULL
, "%d", vdev
->index
);
424 fs
->mqs_kobj
= kobject_create_and_add("mqs", &fs
->kobj
);
430 ret
= sysfs_create_link(&fs
->kobj
, &vdev
->dev
.kobj
, "device");
434 ret
= virtio_fs_add_queues_sysfs(fs
);
438 list_add_tail(&fs
->list
, &virtio_fs_instances
);
440 mutex_unlock(&virtio_fs_mutex
);
442 kobject_uevent(&fs
->kobj
, KOBJ_ADD
);
447 sysfs_remove_link(&fs
->kobj
, "device");
449 kobject_put(fs
->mqs_kobj
);
451 kobject_del(&fs
->kobj
);
453 mutex_unlock(&virtio_fs_mutex
);
457 /* Return the virtio_fs with a given tag, or NULL */
458 static struct virtio_fs
*virtio_fs_find_instance(const char *tag
)
460 struct virtio_fs
*fs
;
462 mutex_lock(&virtio_fs_mutex
);
464 list_for_each_entry(fs
, &virtio_fs_instances
, list
) {
465 if (strcmp(fs
->tag
, tag
) == 0) {
466 kobject_get(&fs
->kobj
);
471 fs
= NULL
; /* not found */
474 mutex_unlock(&virtio_fs_mutex
);
479 static void virtio_fs_free_devs(struct virtio_fs
*fs
)
483 for (i
= 0; i
< fs
->nvqs
; i
++) {
484 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
489 fuse_dev_free(fsvq
->fud
);
494 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
495 static int virtio_fs_read_tag(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
497 char tag_buf
[sizeof_field(struct virtio_fs_config
, tag
)];
501 virtio_cread_bytes(vdev
, offsetof(struct virtio_fs_config
, tag
),
502 &tag_buf
, sizeof(tag_buf
));
503 end
= memchr(tag_buf
, '\0', sizeof(tag_buf
));
505 return -EINVAL
; /* empty tag */
507 end
= &tag_buf
[sizeof(tag_buf
)];
510 fs
->tag
= devm_kmalloc(&vdev
->dev
, len
+ 1, GFP_KERNEL
);
513 memcpy(fs
->tag
, tag_buf
, len
);
516 /* While the VIRTIO specification allows any character, newlines are
517 * awkward on mount(8) command-lines and cause problems in the sysfs
518 * "tag" attr and uevent TAG= properties. Forbid them.
520 if (strchr(fs
->tag
, '\n')) {
521 dev_dbg(&vdev
->dev
, "refusing virtiofs tag with newline character\n");
525 dev_info(&vdev
->dev
, "discovered new tag: %s\n", fs
->tag
);
529 /* Work function for hiprio completion */
530 static void virtio_fs_hiprio_done_work(struct work_struct
*work
)
532 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
534 struct virtqueue
*vq
= fsvq
->vq
;
536 /* Free completed FUSE_FORGET requests */
537 spin_lock(&fsvq
->lock
);
542 virtqueue_disable_cb(vq
);
544 while ((req
= virtqueue_get_buf(vq
, &len
)) != NULL
) {
546 dec_in_flight_req(fsvq
);
548 } while (!virtqueue_enable_cb(vq
));
550 if (!list_empty(&fsvq
->queued_reqs
))
551 schedule_work(&fsvq
->dispatch_work
);
553 spin_unlock(&fsvq
->lock
);
556 static void virtio_fs_request_dispatch_work(struct work_struct
*work
)
558 struct fuse_req
*req
;
559 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
563 pr_debug("virtio-fs: worker %s called.\n", __func__
);
565 spin_lock(&fsvq
->lock
);
566 req
= list_first_entry_or_null(&fsvq
->end_reqs
, struct fuse_req
,
569 spin_unlock(&fsvq
->lock
);
573 list_del_init(&req
->list
);
574 spin_unlock(&fsvq
->lock
);
575 fuse_request_end(req
);
578 /* Dispatch pending requests */
582 spin_lock(&fsvq
->lock
);
583 req
= list_first_entry_or_null(&fsvq
->queued_reqs
,
584 struct fuse_req
, list
);
586 spin_unlock(&fsvq
->lock
);
589 list_del_init(&req
->list
);
590 spin_unlock(&fsvq
->lock
);
592 flags
= memalloc_nofs_save();
593 ret
= virtio_fs_enqueue_req(fsvq
, req
, true, GFP_KERNEL
);
594 memalloc_nofs_restore(flags
);
596 if (ret
== -ENOSPC
) {
597 spin_lock(&fsvq
->lock
);
598 list_add_tail(&req
->list
, &fsvq
->queued_reqs
);
599 spin_unlock(&fsvq
->lock
);
602 req
->out
.h
.error
= ret
;
603 spin_lock(&fsvq
->lock
);
604 dec_in_flight_req(fsvq
);
605 spin_unlock(&fsvq
->lock
);
606 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
608 fuse_request_end(req
);
614 * Returns 1 if queue is full and sender should wait a bit before sending
615 * next request, 0 otherwise.
617 static int send_forget_request(struct virtio_fs_vq
*fsvq
,
618 struct virtio_fs_forget
*forget
,
621 struct scatterlist sg
;
622 struct virtqueue
*vq
;
625 struct virtio_fs_forget_req
*req
= &forget
->req
;
627 spin_lock(&fsvq
->lock
);
628 if (!fsvq
->connected
) {
630 dec_in_flight_req(fsvq
);
635 sg_init_one(&sg
, req
, sizeof(*req
));
637 dev_dbg(&vq
->vdev
->dev
, "%s\n", __func__
);
639 ret
= virtqueue_add_outbuf(vq
, &sg
, 1, forget
, GFP_ATOMIC
);
641 if (ret
== -ENOSPC
) {
642 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
644 list_add_tail(&forget
->list
, &fsvq
->queued_reqs
);
646 inc_in_flight_req(fsvq
);
650 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
654 dec_in_flight_req(fsvq
);
660 inc_in_flight_req(fsvq
);
661 notify
= virtqueue_kick_prepare(vq
);
662 spin_unlock(&fsvq
->lock
);
665 virtqueue_notify(vq
);
668 spin_unlock(&fsvq
->lock
);
672 static void virtio_fs_hiprio_dispatch_work(struct work_struct
*work
)
674 struct virtio_fs_forget
*forget
;
675 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
677 pr_debug("virtio-fs: worker %s called.\n", __func__
);
679 spin_lock(&fsvq
->lock
);
680 forget
= list_first_entry_or_null(&fsvq
->queued_reqs
,
681 struct virtio_fs_forget
, list
);
683 spin_unlock(&fsvq
->lock
);
687 list_del(&forget
->list
);
688 spin_unlock(&fsvq
->lock
);
689 if (send_forget_request(fsvq
, forget
, true))
694 /* Allocate and copy args into req->argbuf */
695 static int copy_args_to_argbuf(struct fuse_req
*req
, gfp_t gfp
)
697 struct fuse_args
*args
= req
->args
;
698 unsigned int offset
= 0;
700 unsigned int num_out
;
704 num_in
= args
->in_numargs
- args
->in_pages
;
705 num_out
= args
->out_numargs
- args
->out_pages
;
706 len
= fuse_len_args(num_in
, (struct fuse_arg
*) args
->in_args
) +
707 fuse_len_args(num_out
, args
->out_args
);
709 req
->argbuf
= kmalloc(len
, gfp
);
713 for (i
= 0; i
< num_in
; i
++) {
714 memcpy(req
->argbuf
+ offset
,
715 args
->in_args
[i
].value
,
716 args
->in_args
[i
].size
);
717 offset
+= args
->in_args
[i
].size
;
723 /* Copy args out of and free req->argbuf */
724 static void copy_args_from_argbuf(struct fuse_args
*args
, struct fuse_req
*req
)
726 unsigned int remaining
;
729 unsigned int num_out
;
732 remaining
= req
->out
.h
.len
- sizeof(req
->out
.h
);
733 num_in
= args
->in_numargs
- args
->in_pages
;
734 num_out
= args
->out_numargs
- args
->out_pages
;
735 offset
= fuse_len_args(num_in
, (struct fuse_arg
*)args
->in_args
);
737 for (i
= 0; i
< num_out
; i
++) {
738 unsigned int argsize
= args
->out_args
[i
].size
;
740 if (args
->out_argvar
&&
741 i
== args
->out_numargs
- 1 &&
742 argsize
> remaining
) {
746 memcpy(args
->out_args
[i
].value
, req
->argbuf
+ offset
, argsize
);
749 if (i
!= args
->out_numargs
- 1)
750 remaining
-= argsize
;
753 /* Store the actual size of the variable-length arg */
754 if (args
->out_argvar
)
755 args
->out_args
[args
->out_numargs
- 1].size
= remaining
;
761 /* Work function for request completion */
762 static void virtio_fs_request_complete(struct fuse_req
*req
,
763 struct virtio_fs_vq
*fsvq
)
765 struct fuse_pqueue
*fpq
= &fsvq
->fud
->pq
;
766 struct fuse_args
*args
;
767 struct fuse_args_pages
*ap
;
768 unsigned int len
, i
, thislen
;
772 * TODO verify that server properly follows FUSE protocol
776 copy_args_from_argbuf(args
, req
);
778 if (args
->out_pages
&& args
->page_zeroing
) {
779 len
= args
->out_args
[args
->out_numargs
- 1].size
;
780 ap
= container_of(args
, typeof(*ap
), args
);
781 for (i
= 0; i
< ap
->num_folios
; i
++) {
782 thislen
= ap
->descs
[i
].length
;
784 WARN_ON(ap
->descs
[i
].offset
);
785 folio
= ap
->folios
[i
];
786 folio_zero_segment(folio
, len
, thislen
);
794 spin_lock(&fpq
->lock
);
795 clear_bit(FR_SENT
, &req
->flags
);
796 spin_unlock(&fpq
->lock
);
798 fuse_request_end(req
);
799 spin_lock(&fsvq
->lock
);
800 dec_in_flight_req(fsvq
);
801 spin_unlock(&fsvq
->lock
);
804 static void virtio_fs_complete_req_work(struct work_struct
*work
)
806 struct virtio_fs_req_work
*w
=
807 container_of(work
, typeof(*w
), done_work
);
809 virtio_fs_request_complete(w
->req
, w
->fsvq
);
813 static void virtio_fs_requests_done_work(struct work_struct
*work
)
815 struct virtio_fs_vq
*fsvq
= container_of(work
, struct virtio_fs_vq
,
817 struct fuse_pqueue
*fpq
= &fsvq
->fud
->pq
;
818 struct virtqueue
*vq
= fsvq
->vq
;
819 struct fuse_req
*req
;
820 struct fuse_req
*next
;
824 /* Collect completed requests off the virtqueue */
825 spin_lock(&fsvq
->lock
);
827 virtqueue_disable_cb(vq
);
829 while ((req
= virtqueue_get_buf(vq
, &len
)) != NULL
) {
830 spin_lock(&fpq
->lock
);
831 list_move_tail(&req
->list
, &reqs
);
832 spin_unlock(&fpq
->lock
);
834 } while (!virtqueue_enable_cb(vq
));
835 spin_unlock(&fsvq
->lock
);
838 list_for_each_entry_safe(req
, next
, &reqs
, list
) {
839 list_del_init(&req
->list
);
841 /* blocking async request completes in a worker context */
842 if (req
->args
->may_block
) {
843 struct virtio_fs_req_work
*w
;
845 w
= kzalloc(sizeof(*w
), GFP_NOFS
| __GFP_NOFAIL
);
846 INIT_WORK(&w
->done_work
, virtio_fs_complete_req_work
);
849 schedule_work(&w
->done_work
);
851 virtio_fs_request_complete(req
, fsvq
);
855 /* Try to push previously queued requests, as the queue might no longer be full */
856 spin_lock(&fsvq
->lock
);
857 if (!list_empty(&fsvq
->queued_reqs
))
858 schedule_work(&fsvq
->dispatch_work
);
859 spin_unlock(&fsvq
->lock
);
862 static void virtio_fs_map_queues(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
864 const struct cpumask
*mask
, *masks
;
867 /* First attempt to map using existing transport layer affinities
870 if (!vdev
->config
->get_vq_affinity
)
873 for (q
= 0; q
< fs
->num_request_queues
; q
++) {
874 mask
= vdev
->config
->get_vq_affinity(vdev
, VQ_REQUEST
+ q
);
878 for_each_cpu(cpu
, mask
)
879 fs
->mq_map
[cpu
] = q
+ VQ_REQUEST
;
884 /* Attempt to map evenly in groups over the CPUs */
885 masks
= group_cpus_evenly(fs
->num_request_queues
);
886 /* If even this fails we default to all CPUs use first request queue */
888 for_each_possible_cpu(cpu
)
889 fs
->mq_map
[cpu
] = VQ_REQUEST
;
893 for (q
= 0; q
< fs
->num_request_queues
; q
++) {
894 for_each_cpu(cpu
, &masks
[q
])
895 fs
->mq_map
[cpu
] = q
+ VQ_REQUEST
;
900 /* Virtqueue interrupt handler */
901 static void virtio_fs_vq_done(struct virtqueue
*vq
)
903 struct virtio_fs_vq
*fsvq
= vq_to_fsvq(vq
);
905 dev_dbg(&vq
->vdev
->dev
, "%s %s\n", __func__
, fsvq
->name
);
907 schedule_work(&fsvq
->done_work
);
910 static void virtio_fs_init_vq(struct virtio_fs_vq
*fsvq
, char *name
,
913 strscpy(fsvq
->name
, name
, VQ_NAME_LEN
);
914 spin_lock_init(&fsvq
->lock
);
915 INIT_LIST_HEAD(&fsvq
->queued_reqs
);
916 INIT_LIST_HEAD(&fsvq
->end_reqs
);
917 init_completion(&fsvq
->in_flight_zero
);
919 if (vq_type
== VQ_REQUEST
) {
920 INIT_WORK(&fsvq
->done_work
, virtio_fs_requests_done_work
);
921 INIT_WORK(&fsvq
->dispatch_work
,
922 virtio_fs_request_dispatch_work
);
924 INIT_WORK(&fsvq
->done_work
, virtio_fs_hiprio_done_work
);
925 INIT_WORK(&fsvq
->dispatch_work
,
926 virtio_fs_hiprio_dispatch_work
);
930 /* Initialize virtqueues */
931 static int virtio_fs_setup_vqs(struct virtio_device
*vdev
,
932 struct virtio_fs
*fs
)
934 struct virtqueue_info
*vqs_info
;
935 struct virtqueue
**vqs
;
936 /* Specify pre_vectors to ensure that the queues before the
937 * request queues (e.g. hiprio) don't claim any of the CPUs in
938 * the multi-queue mapping and interrupt affinities
940 struct irq_affinity desc
= { .pre_vectors
= VQ_REQUEST
};
944 virtio_cread_le(vdev
, struct virtio_fs_config
, num_request_queues
,
945 &fs
->num_request_queues
);
946 if (fs
->num_request_queues
== 0)
949 /* Truncate nr of request queues to nr_cpu_id */
950 fs
->num_request_queues
= min_t(unsigned int, fs
->num_request_queues
,
952 fs
->nvqs
= VQ_REQUEST
+ fs
->num_request_queues
;
953 fs
->vqs
= kcalloc(fs
->nvqs
, sizeof(fs
->vqs
[VQ_HIPRIO
]), GFP_KERNEL
);
957 vqs
= kmalloc_array(fs
->nvqs
, sizeof(vqs
[VQ_HIPRIO
]), GFP_KERNEL
);
958 fs
->mq_map
= kcalloc_node(nr_cpu_ids
, sizeof(*fs
->mq_map
), GFP_KERNEL
,
959 dev_to_node(&vdev
->dev
));
960 vqs_info
= kcalloc(fs
->nvqs
, sizeof(*vqs_info
), GFP_KERNEL
);
961 if (!vqs
|| !vqs_info
|| !fs
->mq_map
) {
966 /* Initialize the hiprio/forget request virtqueue */
967 vqs_info
[VQ_HIPRIO
].callback
= virtio_fs_vq_done
;
968 virtio_fs_init_vq(&fs
->vqs
[VQ_HIPRIO
], "hiprio", VQ_HIPRIO
);
969 vqs_info
[VQ_HIPRIO
].name
= fs
->vqs
[VQ_HIPRIO
].name
;
971 /* Initialize the requests virtqueues */
972 for (i
= VQ_REQUEST
; i
< fs
->nvqs
; i
++) {
973 char vq_name
[VQ_NAME_LEN
];
975 snprintf(vq_name
, VQ_NAME_LEN
, "requests.%u", i
- VQ_REQUEST
);
976 virtio_fs_init_vq(&fs
->vqs
[i
], vq_name
, VQ_REQUEST
);
977 vqs_info
[i
].callback
= virtio_fs_vq_done
;
978 vqs_info
[i
].name
= fs
->vqs
[i
].name
;
981 ret
= virtio_find_vqs(vdev
, fs
->nvqs
, vqs
, vqs_info
, &desc
);
985 for (i
= 0; i
< fs
->nvqs
; i
++)
986 fs
->vqs
[i
].vq
= vqs
[i
];
988 virtio_fs_start_all_queues(fs
);
999 /* Free virtqueues (device must already be reset) */
1000 static void virtio_fs_cleanup_vqs(struct virtio_device
*vdev
)
1002 vdev
->config
->del_vqs(vdev
);
1005 /* Map a window offset to a page frame number. The window offset will have
1006 * been produced by .iomap_begin(), which maps a file offset to a window
1009 static long virtio_fs_direct_access(struct dax_device
*dax_dev
, pgoff_t pgoff
,
1010 long nr_pages
, enum dax_access_mode mode
,
1011 void **kaddr
, pfn_t
*pfn
)
1013 struct virtio_fs
*fs
= dax_get_private(dax_dev
);
1014 phys_addr_t offset
= PFN_PHYS(pgoff
);
1015 size_t max_nr_pages
= fs
->window_len
/ PAGE_SIZE
- pgoff
;
1018 *kaddr
= fs
->window_kaddr
+ offset
;
1020 *pfn
= phys_to_pfn_t(fs
->window_phys_addr
+ offset
,
1022 return nr_pages
> max_nr_pages
? max_nr_pages
: nr_pages
;
1025 static int virtio_fs_zero_page_range(struct dax_device
*dax_dev
,
1026 pgoff_t pgoff
, size_t nr_pages
)
1031 rc
= dax_direct_access(dax_dev
, pgoff
, nr_pages
, DAX_ACCESS
, &kaddr
,
1034 return dax_mem2blk_err(rc
);
1036 memset(kaddr
, 0, nr_pages
<< PAGE_SHIFT
);
1037 dax_flush(dax_dev
, kaddr
, nr_pages
<< PAGE_SHIFT
);
1041 static const struct dax_operations virtio_fs_dax_ops
= {
1042 .direct_access
= virtio_fs_direct_access
,
1043 .zero_page_range
= virtio_fs_zero_page_range
,
1046 static void virtio_fs_cleanup_dax(void *data
)
1048 struct dax_device
*dax_dev
= data
;
1054 DEFINE_FREE(cleanup_dax
, struct dax_dev
*, if (!IS_ERR_OR_NULL(_T
)) virtio_fs_cleanup_dax(_T
))
1056 static int virtio_fs_setup_dax(struct virtio_device
*vdev
, struct virtio_fs
*fs
)
1058 struct dax_device
*dax_dev
__free(cleanup_dax
) = NULL
;
1059 struct virtio_shm_region cache_reg
;
1060 struct dev_pagemap
*pgmap
;
1063 if (!IS_ENABLED(CONFIG_FUSE_DAX
))
1066 dax_dev
= alloc_dax(fs
, &virtio_fs_dax_ops
);
1067 if (IS_ERR(dax_dev
)) {
1068 int rc
= PTR_ERR(dax_dev
);
1069 return rc
== -EOPNOTSUPP
? 0 : rc
;
1072 /* Get cache region */
1073 have_cache
= virtio_get_shm_region(vdev
, &cache_reg
,
1074 (u8
)VIRTIO_FS_SHMCAP_ID_CACHE
);
1076 dev_notice(&vdev
->dev
, "%s: No cache capability\n", __func__
);
1080 if (!devm_request_mem_region(&vdev
->dev
, cache_reg
.addr
, cache_reg
.len
,
1081 dev_name(&vdev
->dev
))) {
1082 dev_warn(&vdev
->dev
, "could not reserve region addr=0x%llx len=0x%llx\n",
1083 cache_reg
.addr
, cache_reg
.len
);
1087 dev_notice(&vdev
->dev
, "Cache len: 0x%llx @ 0x%llx\n", cache_reg
.len
,
1090 pgmap
= devm_kzalloc(&vdev
->dev
, sizeof(*pgmap
), GFP_KERNEL
);
1094 pgmap
->type
= MEMORY_DEVICE_FS_DAX
;
1096 /* Ideally we would directly use the PCI BAR resource but
1097 * devm_memremap_pages() wants its own copy in pgmap. So
1098 * initialize a struct resource from scratch (only the start
1099 * and end fields will be used).
1101 pgmap
->range
= (struct range
) {
1102 .start
= (phys_addr_t
) cache_reg
.addr
,
1103 .end
= (phys_addr_t
) cache_reg
.addr
+ cache_reg
.len
- 1,
1105 pgmap
->nr_range
= 1;
1107 fs
->window_kaddr
= devm_memremap_pages(&vdev
->dev
, pgmap
);
1108 if (IS_ERR(fs
->window_kaddr
))
1109 return PTR_ERR(fs
->window_kaddr
);
1111 fs
->window_phys_addr
= (phys_addr_t
) cache_reg
.addr
;
1112 fs
->window_len
= (phys_addr_t
) cache_reg
.len
;
1114 dev_dbg(&vdev
->dev
, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
1115 __func__
, fs
->window_kaddr
, cache_reg
.addr
, cache_reg
.len
);
1117 fs
->dax_dev
= no_free_ptr(dax_dev
);
1118 return devm_add_action_or_reset(&vdev
->dev
, virtio_fs_cleanup_dax
,
1122 static int virtio_fs_probe(struct virtio_device
*vdev
)
1124 struct virtio_fs
*fs
;
1127 fs
= kzalloc(sizeof(*fs
), GFP_KERNEL
);
1130 kobject_init(&fs
->kobj
, &virtio_fs_ktype
);
1133 ret
= virtio_fs_read_tag(vdev
, fs
);
1137 ret
= virtio_fs_setup_vqs(vdev
, fs
);
1141 virtio_fs_map_queues(vdev
, fs
);
1143 ret
= virtio_fs_setup_dax(vdev
, fs
);
1147 /* Bring the device online in case the filesystem is mounted and
1148 * requests need to be sent before we return.
1150 virtio_device_ready(vdev
);
1152 ret
= virtio_fs_add_instance(vdev
, fs
);
1159 virtio_reset_device(vdev
);
1160 virtio_fs_cleanup_vqs(vdev
);
1164 kobject_put(&fs
->kobj
);
1168 static void virtio_fs_stop_all_queues(struct virtio_fs
*fs
)
1170 struct virtio_fs_vq
*fsvq
;
1173 for (i
= 0; i
< fs
->nvqs
; i
++) {
1175 spin_lock(&fsvq
->lock
);
1176 fsvq
->connected
= false;
1177 spin_unlock(&fsvq
->lock
);
1181 static void virtio_fs_remove(struct virtio_device
*vdev
)
1183 struct virtio_fs
*fs
= vdev
->priv
;
1185 mutex_lock(&virtio_fs_mutex
);
1186 /* This device is going away. No one should get new reference */
1187 list_del_init(&fs
->list
);
1188 virtio_fs_delete_queues_sysfs(fs
);
1189 sysfs_remove_link(&fs
->kobj
, "device");
1190 kobject_put(fs
->mqs_kobj
);
1191 kobject_del(&fs
->kobj
);
1192 virtio_fs_stop_all_queues(fs
);
1193 virtio_fs_drain_all_queues_locked(fs
);
1194 virtio_reset_device(vdev
);
1195 virtio_fs_cleanup_vqs(vdev
);
1198 /* Put device reference on virtio_fs object */
1199 virtio_fs_put_locked(fs
);
1200 mutex_unlock(&virtio_fs_mutex
);
1203 #ifdef CONFIG_PM_SLEEP
1204 static int virtio_fs_freeze(struct virtio_device
*vdev
)
1206 /* TODO need to save state here */
1207 pr_warn("virtio-fs: suspend/resume not yet supported\n");
1211 static int virtio_fs_restore(struct virtio_device
*vdev
)
1213 /* TODO need to restore state here */
1216 #endif /* CONFIG_PM_SLEEP */
1218 static const struct virtio_device_id id_table
[] = {
1219 { VIRTIO_ID_FS
, VIRTIO_DEV_ANY_ID
},
1223 static const unsigned int feature_table
[] = {};
1225 static struct virtio_driver virtio_fs_driver
= {
1226 .driver
.name
= KBUILD_MODNAME
,
1227 .id_table
= id_table
,
1228 .feature_table
= feature_table
,
1229 .feature_table_size
= ARRAY_SIZE(feature_table
),
1230 .probe
= virtio_fs_probe
,
1231 .remove
= virtio_fs_remove
,
1232 #ifdef CONFIG_PM_SLEEP
1233 .freeze
= virtio_fs_freeze
,
1234 .restore
= virtio_fs_restore
,
1238 static void virtio_fs_send_forget(struct fuse_iqueue
*fiq
, struct fuse_forget_link
*link
)
1240 struct virtio_fs_forget
*forget
;
1241 struct virtio_fs_forget_req
*req
;
1242 struct virtio_fs
*fs
= fiq
->priv
;
1243 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[VQ_HIPRIO
];
1244 u64 unique
= fuse_get_unique(fiq
);
1246 /* Allocate a buffer for the request */
1247 forget
= kmalloc(sizeof(*forget
), GFP_NOFS
| __GFP_NOFAIL
);
1250 req
->ih
= (struct fuse_in_header
){
1251 .opcode
= FUSE_FORGET
,
1252 .nodeid
= link
->forget_one
.nodeid
,
1254 .len
= sizeof(*req
),
1256 req
->arg
= (struct fuse_forget_in
){
1257 .nlookup
= link
->forget_one
.nlookup
,
1260 send_forget_request(fsvq
, forget
, false);
1264 static void virtio_fs_send_interrupt(struct fuse_iqueue
*fiq
, struct fuse_req
*req
)
1269 * Normal fs operations on a local filesystems aren't interruptible.
1270 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
1271 * with shared lock between host and guest.
1275 /* Count number of scatter-gather elements required */
1276 static unsigned int sg_count_fuse_folios(struct fuse_folio_desc
*folio_descs
,
1277 unsigned int num_folios
,
1278 unsigned int total_len
)
1281 unsigned int this_len
;
1283 for (i
= 0; i
< num_folios
&& total_len
; i
++) {
1284 this_len
= min(folio_descs
[i
].length
, total_len
);
1285 total_len
-= this_len
;
1291 /* Return the number of scatter-gather list elements required */
1292 static unsigned int sg_count_fuse_req(struct fuse_req
*req
)
1294 struct fuse_args
*args
= req
->args
;
1295 struct fuse_args_pages
*ap
= container_of(args
, typeof(*ap
), args
);
1296 unsigned int size
, total_sgs
= 1 /* fuse_in_header */;
1298 if (args
->in_numargs
- args
->in_pages
)
1301 if (args
->in_pages
) {
1302 size
= args
->in_args
[args
->in_numargs
- 1].size
;
1303 total_sgs
+= sg_count_fuse_folios(ap
->descs
, ap
->num_folios
,
1307 if (!test_bit(FR_ISREPLY
, &req
->flags
))
1310 total_sgs
+= 1 /* fuse_out_header */;
1312 if (args
->out_numargs
- args
->out_pages
)
1315 if (args
->out_pages
) {
1316 size
= args
->out_args
[args
->out_numargs
- 1].size
;
1317 total_sgs
+= sg_count_fuse_folios(ap
->descs
, ap
->num_folios
,
1324 /* Add folios to scatter-gather list and return number of elements used */
1325 static unsigned int sg_init_fuse_folios(struct scatterlist
*sg
,
1326 struct folio
**folios
,
1327 struct fuse_folio_desc
*folio_descs
,
1328 unsigned int num_folios
,
1329 unsigned int total_len
)
1332 unsigned int this_len
;
1334 for (i
= 0; i
< num_folios
&& total_len
; i
++) {
1335 sg_init_table(&sg
[i
], 1);
1336 this_len
= min(folio_descs
[i
].length
, total_len
);
1337 sg_set_folio(&sg
[i
], folios
[i
], this_len
, folio_descs
[i
].offset
);
1338 total_len
-= this_len
;
1344 /* Add args to scatter-gather list and return number of elements used */
1345 static unsigned int sg_init_fuse_args(struct scatterlist
*sg
,
1346 struct fuse_req
*req
,
1347 struct fuse_arg
*args
,
1348 unsigned int numargs
,
1351 unsigned int *len_used
)
1353 struct fuse_args_pages
*ap
= container_of(req
->args
, typeof(*ap
), args
);
1354 unsigned int total_sgs
= 0;
1357 len
= fuse_len_args(numargs
- argpages
, args
);
1359 sg_init_one(&sg
[total_sgs
++], argbuf
, len
);
1362 total_sgs
+= sg_init_fuse_folios(&sg
[total_sgs
],
1363 ap
->folios
, ap
->descs
,
1365 args
[numargs
- 1].size
);
1373 /* Add a request to a virtqueue and kick the device */
1374 static int virtio_fs_enqueue_req(struct virtio_fs_vq
*fsvq
,
1375 struct fuse_req
*req
, bool in_flight
,
1378 /* requests need at least 4 elements */
1379 struct scatterlist
*stack_sgs
[6];
1380 struct scatterlist stack_sg
[ARRAY_SIZE(stack_sgs
)];
1381 struct scatterlist
**sgs
= stack_sgs
;
1382 struct scatterlist
*sg
= stack_sg
;
1383 struct virtqueue
*vq
;
1384 struct fuse_args
*args
= req
->args
;
1385 unsigned int argbuf_used
= 0;
1386 unsigned int out_sgs
= 0;
1387 unsigned int in_sgs
= 0;
1388 unsigned int total_sgs
;
1392 struct fuse_pqueue
*fpq
;
1394 /* Does the sglist fit on the stack? */
1395 total_sgs
= sg_count_fuse_req(req
);
1396 if (total_sgs
> ARRAY_SIZE(stack_sgs
)) {
1397 sgs
= kmalloc_array(total_sgs
, sizeof(sgs
[0]), gfp
);
1398 sg
= kmalloc_array(total_sgs
, sizeof(sg
[0]), gfp
);
1405 /* Use a bounce buffer since stack args cannot be mapped */
1406 ret
= copy_args_to_argbuf(req
, gfp
);
1410 /* Request elements */
1411 sg_init_one(&sg
[out_sgs
++], &req
->in
.h
, sizeof(req
->in
.h
));
1412 out_sgs
+= sg_init_fuse_args(&sg
[out_sgs
], req
,
1413 (struct fuse_arg
*)args
->in_args
,
1414 args
->in_numargs
, args
->in_pages
,
1415 req
->argbuf
, &argbuf_used
);
1417 /* Reply elements */
1418 if (test_bit(FR_ISREPLY
, &req
->flags
)) {
1419 sg_init_one(&sg
[out_sgs
+ in_sgs
++],
1420 &req
->out
.h
, sizeof(req
->out
.h
));
1421 in_sgs
+= sg_init_fuse_args(&sg
[out_sgs
+ in_sgs
], req
,
1422 args
->out_args
, args
->out_numargs
,
1424 req
->argbuf
+ argbuf_used
, NULL
);
1427 WARN_ON(out_sgs
+ in_sgs
!= total_sgs
);
1429 for (i
= 0; i
< total_sgs
; i
++)
1432 spin_lock(&fsvq
->lock
);
1434 if (!fsvq
->connected
) {
1435 spin_unlock(&fsvq
->lock
);
1441 ret
= virtqueue_add_sgs(vq
, sgs
, out_sgs
, in_sgs
, req
, GFP_ATOMIC
);
1443 spin_unlock(&fsvq
->lock
);
1447 /* Request successfully sent. */
1448 fpq
= &fsvq
->fud
->pq
;
1449 spin_lock(&fpq
->lock
);
1450 list_add_tail(&req
->list
, fpq
->processing
);
1451 spin_unlock(&fpq
->lock
);
1452 set_bit(FR_SENT
, &req
->flags
);
1453 /* matches barrier in request_wait_answer() */
1454 smp_mb__after_atomic();
1457 inc_in_flight_req(fsvq
);
1458 notify
= virtqueue_kick_prepare(vq
);
1460 spin_unlock(&fsvq
->lock
);
1463 virtqueue_notify(vq
);
1466 if (ret
< 0 && req
->argbuf
) {
1470 if (sgs
!= stack_sgs
) {
1478 static void virtio_fs_send_req(struct fuse_iqueue
*fiq
, struct fuse_req
*req
)
1480 unsigned int queue_id
;
1481 struct virtio_fs
*fs
;
1482 struct virtio_fs_vq
*fsvq
;
1485 if (req
->in
.h
.opcode
!= FUSE_NOTIFY_REPLY
)
1486 req
->in
.h
.unique
= fuse_get_unique(fiq
);
1488 clear_bit(FR_PENDING
, &req
->flags
);
1491 queue_id
= fs
->mq_map
[raw_smp_processor_id()];
1493 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n",
1494 __func__
, req
->in
.h
.opcode
, req
->in
.h
.unique
,
1495 req
->in
.h
.nodeid
, req
->in
.h
.len
,
1496 fuse_len_args(req
->args
->out_numargs
, req
->args
->out_args
),
1499 fsvq
= &fs
->vqs
[queue_id
];
1500 ret
= virtio_fs_enqueue_req(fsvq
, req
, false, GFP_ATOMIC
);
1502 if (ret
== -ENOSPC
) {
1504 * Virtqueue full. Retry submission from worker
1505 * context as we might be holding fc->bg_lock.
1507 spin_lock(&fsvq
->lock
);
1508 list_add_tail(&req
->list
, &fsvq
->queued_reqs
);
1509 inc_in_flight_req(fsvq
);
1510 spin_unlock(&fsvq
->lock
);
1513 req
->out
.h
.error
= ret
;
1514 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret
);
1516 /* Can't end request in submission context. Use a worker */
1517 spin_lock(&fsvq
->lock
);
1518 list_add_tail(&req
->list
, &fsvq
->end_reqs
);
1519 schedule_work(&fsvq
->dispatch_work
);
1520 spin_unlock(&fsvq
->lock
);
1525 static const struct fuse_iqueue_ops virtio_fs_fiq_ops
= {
1526 .send_forget
= virtio_fs_send_forget
,
1527 .send_interrupt
= virtio_fs_send_interrupt
,
1528 .send_req
= virtio_fs_send_req
,
1529 .release
= virtio_fs_fiq_release
,
1532 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context
*ctx
)
1534 ctx
->rootmode
= S_IFDIR
;
1535 ctx
->default_permissions
= 1;
1536 ctx
->allow_other
= 1;
1537 ctx
->max_read
= UINT_MAX
;
1539 ctx
->destroy
= true;
1540 ctx
->no_control
= true;
1541 ctx
->no_force_umount
= true;
1544 static int virtio_fs_fill_super(struct super_block
*sb
, struct fs_context
*fsc
)
1546 struct fuse_mount
*fm
= get_fuse_mount_super(sb
);
1547 struct fuse_conn
*fc
= fm
->fc
;
1548 struct virtio_fs
*fs
= fc
->iq
.priv
;
1549 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
1553 virtio_fs_ctx_set_defaults(ctx
);
1554 mutex_lock(&virtio_fs_mutex
);
1556 /* After holding mutex, make sure virtiofs device is still there.
1557 * Though we are holding a reference to it, drive ->remove might
1558 * still have cleaned up virtual queues. In that case bail out.
1561 if (list_empty(&fs
->list
)) {
1562 pr_info("virtio-fs: tag <%s> not found\n", fs
->tag
);
1567 /* Allocate fuse_dev for hiprio and notification queues */
1568 for (i
= 0; i
< fs
->nvqs
; i
++) {
1569 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
1571 fsvq
->fud
= fuse_dev_alloc();
1573 goto err_free_fuse_devs
;
1576 /* virtiofs allocates and installs its own fuse devices */
1578 if (ctx
->dax_mode
!= FUSE_DAX_NEVER
) {
1579 if (ctx
->dax_mode
== FUSE_DAX_ALWAYS
&& !fs
->dax_dev
) {
1581 pr_err("virtio-fs: dax can't be enabled as filesystem"
1582 " device does not support it.\n");
1583 goto err_free_fuse_devs
;
1585 ctx
->dax_dev
= fs
->dax_dev
;
1587 err
= fuse_fill_super_common(sb
, ctx
);
1589 goto err_free_fuse_devs
;
1591 for (i
= 0; i
< fs
->nvqs
; i
++) {
1592 struct virtio_fs_vq
*fsvq
= &fs
->vqs
[i
];
1594 fuse_dev_install(fsvq
->fud
, fc
);
1597 /* Previous unmount will stop all queues. Start these again */
1598 virtio_fs_start_all_queues(fs
);
1600 mutex_unlock(&virtio_fs_mutex
);
1604 virtio_fs_free_devs(fs
);
1606 mutex_unlock(&virtio_fs_mutex
);
1610 static void virtio_fs_conn_destroy(struct fuse_mount
*fm
)
1612 struct fuse_conn
*fc
= fm
->fc
;
1613 struct virtio_fs
*vfs
= fc
->iq
.priv
;
1614 struct virtio_fs_vq
*fsvq
= &vfs
->vqs
[VQ_HIPRIO
];
1616 /* Stop dax worker. Soon evict_inodes() will be called which
1617 * will free all memory ranges belonging to all inodes.
1619 if (IS_ENABLED(CONFIG_FUSE_DAX
))
1620 fuse_dax_cancel_work(fc
);
1622 /* Stop forget queue. Soon destroy will be sent */
1623 spin_lock(&fsvq
->lock
);
1624 fsvq
->connected
= false;
1625 spin_unlock(&fsvq
->lock
);
1626 virtio_fs_drain_all_queues(vfs
);
1628 fuse_conn_destroy(fm
);
1630 /* fuse_conn_destroy() must have sent destroy. Stop all queues
1631 * and drain one more time and free fuse devices. Freeing fuse
1632 * devices will drop their reference on fuse_conn and that in
1633 * turn will drop its reference on virtio_fs object.
1635 virtio_fs_stop_all_queues(vfs
);
1636 virtio_fs_drain_all_queues(vfs
);
1637 virtio_fs_free_devs(vfs
);
1640 static void virtio_kill_sb(struct super_block
*sb
)
1642 struct fuse_mount
*fm
= get_fuse_mount_super(sb
);
1645 /* If mount failed, we can still be called without any fc */
1647 last
= fuse_mount_remove(fm
);
1649 virtio_fs_conn_destroy(fm
);
1651 kill_anon_super(sb
);
1652 fuse_mount_destroy(fm
);
1655 static int virtio_fs_test_super(struct super_block
*sb
,
1656 struct fs_context
*fsc
)
1658 struct fuse_mount
*fsc_fm
= fsc
->s_fs_info
;
1659 struct fuse_mount
*sb_fm
= get_fuse_mount_super(sb
);
1661 return fsc_fm
->fc
->iq
.priv
== sb_fm
->fc
->iq
.priv
;
1664 static int virtio_fs_get_tree(struct fs_context
*fsc
)
1666 struct virtio_fs
*fs
;
1667 struct super_block
*sb
;
1668 struct fuse_conn
*fc
= NULL
;
1669 struct fuse_mount
*fm
;
1670 unsigned int virtqueue_size
;
1673 /* This gets a reference on virtio_fs object. This ptr gets installed
1674 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1675 * to drop the reference to this object.
1677 fs
= virtio_fs_find_instance(fsc
->source
);
1679 pr_info("virtio-fs: tag <%s> not found\n", fsc
->source
);
1683 virtqueue_size
= virtqueue_get_vring_size(fs
->vqs
[VQ_REQUEST
].vq
);
1684 if (WARN_ON(virtqueue_size
<= FUSE_HEADER_OVERHEAD
))
1688 fc
= kzalloc(sizeof(struct fuse_conn
), GFP_KERNEL
);
1692 fm
= kzalloc(sizeof(struct fuse_mount
), GFP_KERNEL
);
1696 fuse_conn_init(fc
, fm
, fsc
->user_ns
, &virtio_fs_fiq_ops
, fs
);
1697 fc
->release
= fuse_free_conn
;
1698 fc
->delete_stale
= true;
1699 fc
->auto_submounts
= true;
1701 fc
->use_pages_for_kvec_io
= true;
1703 /* Tell FUSE to split requests that exceed the virtqueue's size */
1704 fc
->max_pages_limit
= min_t(unsigned int, fc
->max_pages_limit
,
1705 virtqueue_size
- FUSE_HEADER_OVERHEAD
);
1707 fsc
->s_fs_info
= fm
;
1708 sb
= sget_fc(fsc
, virtio_fs_test_super
, set_anon_super_fc
);
1710 fuse_mount_destroy(fm
);
1715 err
= virtio_fs_fill_super(sb
, fsc
);
1717 deactivate_locked_super(sb
);
1721 sb
->s_flags
|= SB_ACTIVE
;
1725 fsc
->root
= dget(sb
->s_root
);
1734 static const struct fs_context_operations virtio_fs_context_ops
= {
1735 .free
= virtio_fs_free_fsc
,
1736 .parse_param
= virtio_fs_parse_param
,
1737 .get_tree
= virtio_fs_get_tree
,
1740 static int virtio_fs_init_fs_context(struct fs_context
*fsc
)
1742 struct fuse_fs_context
*ctx
;
1744 if (fsc
->purpose
== FS_CONTEXT_FOR_SUBMOUNT
)
1745 return fuse_init_fs_context_submount(fsc
);
1747 ctx
= kzalloc(sizeof(struct fuse_fs_context
), GFP_KERNEL
);
1750 fsc
->fs_private
= ctx
;
1751 fsc
->ops
= &virtio_fs_context_ops
;
1755 static struct file_system_type virtio_fs_type
= {
1756 .owner
= THIS_MODULE
,
1758 .init_fs_context
= virtio_fs_init_fs_context
,
1759 .kill_sb
= virtio_kill_sb
,
1760 .fs_flags
= FS_ALLOW_IDMAP
,
1763 static int virtio_fs_uevent(const struct kobject
*kobj
, struct kobj_uevent_env
*env
)
1765 const struct virtio_fs
*fs
= container_of(kobj
, struct virtio_fs
, kobj
);
1767 add_uevent_var(env
, "TAG=%s", fs
->tag
);
1771 static const struct kset_uevent_ops virtio_fs_uevent_ops
= {
1772 .uevent
= virtio_fs_uevent
,
1775 static int __init
virtio_fs_sysfs_init(void)
1777 virtio_fs_kset
= kset_create_and_add("virtiofs", &virtio_fs_uevent_ops
,
1779 if (!virtio_fs_kset
)
1784 static void virtio_fs_sysfs_exit(void)
1786 kset_unregister(virtio_fs_kset
);
1787 virtio_fs_kset
= NULL
;
1790 static int __init
virtio_fs_init(void)
1794 ret
= virtio_fs_sysfs_init();
1798 ret
= register_virtio_driver(&virtio_fs_driver
);
1802 ret
= register_filesystem(&virtio_fs_type
);
1804 goto unregister_virtio_driver
;
1808 unregister_virtio_driver
:
1809 unregister_virtio_driver(&virtio_fs_driver
);
1811 virtio_fs_sysfs_exit();
1814 module_init(virtio_fs_init
);
1816 static void __exit
virtio_fs_exit(void)
1818 unregister_filesystem(&virtio_fs_type
);
1819 unregister_virtio_driver(&virtio_fs_driver
);
1820 virtio_fs_sysfs_exit();
1822 module_exit(virtio_fs_exit
);
1824 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1825 MODULE_DESCRIPTION("Virtio Filesystem");
1826 MODULE_LICENSE("GPL");
1827 MODULE_ALIAS_FS(KBUILD_MODNAME
);
1828 MODULE_DEVICE_TABLE(virtio
, id_table
);