1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright (C) 1997 Richard Günther
7 * binfmt_misc detects binaries via a magic or filename extension and invokes
8 * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details.
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/sched/mm.h>
17 #include <linux/magic.h>
18 #include <linux/binfmts.h>
19 #include <linux/slab.h>
20 #include <linux/ctype.h>
21 #include <linux/string_helpers.h>
22 #include <linux/file.h>
23 #include <linux/pagemap.h>
24 #include <linux/namei.h>
25 #include <linux/mount.h>
26 #include <linux/fs_context.h>
27 #include <linux/syscalls.h>
29 #include <linux/uaccess.h>
40 VERBOSE_STATUS
= 1 /* make it zero to save 400 bytes kernel memory */
43 enum {Enabled
, Magic
};
44 #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
45 #define MISC_FMT_OPEN_BINARY (1UL << 30)
46 #define MISC_FMT_CREDENTIALS (1UL << 29)
47 #define MISC_FMT_OPEN_FILE (1UL << 28)
50 struct list_head list
;
51 unsigned long flags
; /* type, status, etc. */
52 int offset
; /* offset of magic */
53 int size
; /* size of magic/mask */
54 char *magic
; /* magic or filename extension */
55 char *mask
; /* mask, NULL for exact match */
56 const char *interpreter
; /* filename of interpreter */
58 struct dentry
*dentry
;
59 struct file
*interp_file
;
60 refcount_t users
; /* sync removal with load_misc_binary() */
63 static struct file_system_type bm_fs_type
;
66 * Max length of the register string. Determined by:
70 * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
71 * - magic: 128 bytes (512 in escaped form)
72 * - mask: 128 bytes (512 in escaped form)
75 * Round that up a bit, and then back off to hold the internal data
78 #define MAX_REGISTER_LENGTH 1920
81 * search_binfmt_handler - search for a binary handler for @bprm
82 * @misc: handle to binfmt_misc instance
83 * @bprm: binary for which we are looking for a handler
85 * Search for a binary type handler for @bprm in the list of registered binary
88 * Return: binary type list entry on success, NULL on failure
90 static Node
*search_binfmt_handler(struct binfmt_misc
*misc
,
91 struct linux_binprm
*bprm
)
93 char *p
= strrchr(bprm
->interp
, '.');
96 /* Walk all the registered handlers. */
97 list_for_each_entry(e
, &misc
->entries
, list
) {
101 /* Make sure this one is currently enabled. */
102 if (!test_bit(Enabled
, &e
->flags
))
105 /* Do matching based on extension if applicable. */
106 if (!test_bit(Magic
, &e
->flags
)) {
107 if (p
&& !strcmp(e
->magic
, p
+ 1))
112 /* Do matching based on magic & mask. */
113 s
= bprm
->buf
+ e
->offset
;
115 for (j
= 0; j
< e
->size
; j
++)
116 if ((*s
++ ^ e
->magic
[j
]) & e
->mask
[j
])
119 for (j
= 0; j
< e
->size
; j
++)
120 if ((*s
++ ^ e
->magic
[j
]))
131 * get_binfmt_handler - try to find a binary type handler
132 * @misc: handle to binfmt_misc instance
133 * @bprm: binary for which we are looking for a handler
135 * Try to find a binfmt handler for the binary type. If one is found take a
136 * reference to protect against removal via bm_{entry,status}_write().
138 * Return: binary type list entry on success, NULL on failure
140 static Node
*get_binfmt_handler(struct binfmt_misc
*misc
,
141 struct linux_binprm
*bprm
)
145 read_lock(&misc
->entries_lock
);
146 e
= search_binfmt_handler(misc
, bprm
);
148 refcount_inc(&e
->users
);
149 read_unlock(&misc
->entries_lock
);
154 * put_binfmt_handler - put binary handler node
157 * Free node syncing with load_misc_binary() and defer final free to
158 * load_misc_binary() in case it is using the binary type handler we were
159 * requested to remove.
161 static void put_binfmt_handler(Node
*e
)
163 if (refcount_dec_and_test(&e
->users
)) {
164 if (e
->flags
& MISC_FMT_OPEN_FILE
)
165 filp_close(e
->interp_file
, NULL
);
171 * load_binfmt_misc - load the binfmt_misc of the caller's user namespace
173 * To be called in load_misc_binary() to load the relevant struct binfmt_misc.
174 * If a user namespace doesn't have its own binfmt_misc mount it can make use
175 * of its ancestor's binfmt_misc handlers. This mimicks the behavior of
176 * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where
177 * available to all user and user namespaces on the system.
179 * Return: the binfmt_misc instance of the caller's user namespace
181 static struct binfmt_misc
*load_binfmt_misc(void)
183 const struct user_namespace
*user_ns
;
184 struct binfmt_misc
*misc
;
186 user_ns
= current_user_ns();
188 /* Pairs with smp_store_release() in bm_fill_super(). */
189 misc
= smp_load_acquire(&user_ns
->binfmt_misc
);
193 user_ns
= user_ns
->parent
;
196 return &init_binfmt_misc
;
202 static int load_misc_binary(struct linux_binprm
*bprm
)
205 struct file
*interp_file
= NULL
;
206 int retval
= -ENOEXEC
;
207 struct binfmt_misc
*misc
;
209 misc
= load_binfmt_misc();
213 fmt
= get_binfmt_handler(misc
, bprm
);
217 /* Need to be able to load the file after exec */
219 if (bprm
->interp_flags
& BINPRM_FLAGS_PATH_INACCESSIBLE
)
222 if (fmt
->flags
& MISC_FMT_PRESERVE_ARGV0
) {
223 bprm
->interp_flags
|= BINPRM_FLAGS_PRESERVE_ARGV0
;
225 retval
= remove_arg_zero(bprm
);
230 if (fmt
->flags
& MISC_FMT_OPEN_BINARY
)
231 bprm
->have_execfd
= 1;
233 /* make argv[1] be the path to the binary */
234 retval
= copy_string_kernel(bprm
->interp
, bprm
);
239 /* add the interp as argv[0] */
240 retval
= copy_string_kernel(fmt
->interpreter
, bprm
);
245 /* Update interp in case binfmt_script needs it. */
246 retval
= bprm_change_interp(fmt
->interpreter
, bprm
);
250 if (fmt
->flags
& MISC_FMT_OPEN_FILE
)
251 interp_file
= file_clone_open(fmt
->interp_file
);
253 interp_file
= open_exec(fmt
->interpreter
);
254 retval
= PTR_ERR(interp_file
);
255 if (IS_ERR(interp_file
))
258 bprm
->interpreter
= interp_file
;
259 if (fmt
->flags
& MISC_FMT_CREDENTIALS
)
260 bprm
->execfd_creds
= 1;
266 * If we actually put the node here all concurrent calls to
267 * load_misc_binary() will have finished. We also know
268 * that for the refcount to be zero someone must have concurently
269 * removed the binary type handler from the list and it's our job to
272 put_binfmt_handler(fmt
);
277 /* Command parsers */
280 * parses and copies one argument enclosed in del from *sp to *dp,
281 * recognising the \x special.
282 * returns pointer to the copied argument or NULL in case of an
283 * error (and sets err) or null argument length.
285 static char *scanarg(char *s
, char del
)
289 while ((c
= *s
++) != del
) {
290 if (c
== '\\' && *s
== 'x') {
302 static char *check_special_flags(char *sfs
, Node
*e
)
311 pr_debug("register: flag: P (preserve argv0)\n");
313 e
->flags
|= MISC_FMT_PRESERVE_ARGV0
;
316 pr_debug("register: flag: O (open binary)\n");
318 e
->flags
|= MISC_FMT_OPEN_BINARY
;
321 pr_debug("register: flag: C (preserve creds)\n");
323 /* this flags also implies the
325 e
->flags
|= (MISC_FMT_CREDENTIALS
|
326 MISC_FMT_OPEN_BINARY
);
329 pr_debug("register: flag: F: open interpreter file now\n");
331 e
->flags
|= MISC_FMT_OPEN_FILE
;
342 * This registers a new binary format, it recognises the syntax
343 * ':name:type:offset:magic:mask:interpreter:flags'
344 * where the ':' is the IFS, that can be chosen with the first char
346 static Node
*create_entry(const char __user
*buffer
, size_t count
)
353 pr_debug("register: received %zu bytes\n", count
);
355 /* some sanity checks */
357 if ((count
< 11) || (count
> MAX_REGISTER_LENGTH
))
361 memsize
= sizeof(Node
) + count
+ 8;
362 e
= kmalloc(memsize
, GFP_KERNEL_ACCOUNT
);
366 p
= buf
= (char *)e
+ sizeof(Node
);
368 memset(e
, 0, sizeof(Node
));
369 if (copy_from_user(buf
, buffer
, count
))
372 del
= *p
++; /* delimeter */
374 pr_debug("register: delim: %#x {%c}\n", del
, del
);
376 /* Pad the buffer with the delim to simplify parsing below. */
377 memset(buf
+ count
, del
, 8);
379 /* Parse the 'name' field. */
386 !strcmp(e
->name
, ".") ||
387 !strcmp(e
->name
, "..") ||
388 strchr(e
->name
, '/'))
391 pr_debug("register: name: {%s}\n", e
->name
);
393 /* Parse the 'type' field. */
396 pr_debug("register: type: E (extension)\n");
397 e
->flags
= 1 << Enabled
;
400 pr_debug("register: type: M (magic)\n");
401 e
->flags
= (1 << Enabled
) | (1 << Magic
);
409 if (test_bit(Magic
, &e
->flags
)) {
410 /* Handle the 'M' (magic) format. */
413 /* Parse the 'offset' field. */
419 int r
= kstrtoint(p
, 10, &e
->offset
);
420 if (r
!= 0 || e
->offset
< 0)
426 pr_debug("register: offset: %#x\n", e
->offset
);
428 /* Parse the 'magic' field. */
436 print_hex_dump_bytes(
437 KBUILD_MODNAME
": register: magic[raw]: ",
438 DUMP_PREFIX_NONE
, e
->magic
, p
- e
->magic
);
440 /* Parse the 'mask' field. */
447 pr_debug("register: mask[raw]: none\n");
448 } else if (USE_DEBUG
)
449 print_hex_dump_bytes(
450 KBUILD_MODNAME
": register: mask[raw]: ",
451 DUMP_PREFIX_NONE
, e
->mask
, p
- e
->mask
);
454 * Decode the magic & mask fields.
455 * Note: while we might have accepted embedded NUL bytes from
456 * above, the unescape helpers here will stop at the first one
459 e
->size
= string_unescape_inplace(e
->magic
, UNESCAPE_HEX
);
461 string_unescape_inplace(e
->mask
, UNESCAPE_HEX
) != e
->size
)
463 if (e
->size
> BINPRM_BUF_SIZE
||
464 BINPRM_BUF_SIZE
- e
->size
< e
->offset
)
466 pr_debug("register: magic/mask length: %i\n", e
->size
);
468 print_hex_dump_bytes(
469 KBUILD_MODNAME
": register: magic[decoded]: ",
470 DUMP_PREFIX_NONE
, e
->magic
, e
->size
);
474 char *masked
= kmalloc(e
->size
, GFP_KERNEL_ACCOUNT
);
476 print_hex_dump_bytes(
477 KBUILD_MODNAME
": register: mask[decoded]: ",
478 DUMP_PREFIX_NONE
, e
->mask
, e
->size
);
481 for (i
= 0; i
< e
->size
; ++i
)
482 masked
[i
] = e
->magic
[i
] & e
->mask
[i
];
483 print_hex_dump_bytes(
484 KBUILD_MODNAME
": register: magic[masked]: ",
485 DUMP_PREFIX_NONE
, masked
, e
->size
);
492 /* Handle the 'E' (extension) format. */
494 /* Skip the 'offset' field. */
500 /* Parse the 'magic' field. */
506 if (!e
->magic
[0] || strchr(e
->magic
, '/'))
508 pr_debug("register: extension: {%s}\n", e
->magic
);
510 /* Skip the 'mask' field. */
517 /* Parse the 'interpreter' field. */
523 if (!e
->interpreter
[0])
525 pr_debug("register: interpreter: {%s}\n", e
->interpreter
);
527 /* Parse the 'flags' field. */
528 p
= check_special_flags(p
, e
);
531 if (p
!= buf
+ count
)
541 return ERR_PTR(-EFAULT
);
544 return ERR_PTR(-EINVAL
);
548 * Set status of entry/binfmt_misc:
549 * '1' enables, '0' disables and '-1' clears entry/binfmt_misc
551 static int parse_command(const char __user
*buffer
, size_t count
)
557 if (copy_from_user(s
, buffer
, count
))
561 if (s
[count
- 1] == '\n')
563 if (count
== 1 && s
[0] == '0')
565 if (count
== 1 && s
[0] == '1')
567 if (count
== 2 && s
[0] == '-' && s
[1] == '1')
574 static void entry_status(Node
*e
, char *page
)
577 const char *status
= "disabled";
579 if (test_bit(Enabled
, &e
->flags
))
582 if (!VERBOSE_STATUS
) {
583 sprintf(page
, "%s\n", status
);
587 dp
+= sprintf(dp
, "%s\ninterpreter %s\n", status
, e
->interpreter
);
589 /* print the special flags */
590 dp
+= sprintf(dp
, "flags: ");
591 if (e
->flags
& MISC_FMT_PRESERVE_ARGV0
)
593 if (e
->flags
& MISC_FMT_OPEN_BINARY
)
595 if (e
->flags
& MISC_FMT_CREDENTIALS
)
597 if (e
->flags
& MISC_FMT_OPEN_FILE
)
601 if (!test_bit(Magic
, &e
->flags
)) {
602 sprintf(dp
, "extension .%s\n", e
->magic
);
604 dp
+= sprintf(dp
, "offset %i\nmagic ", e
->offset
);
605 dp
= bin2hex(dp
, e
->magic
, e
->size
);
607 dp
+= sprintf(dp
, "\nmask ");
608 dp
= bin2hex(dp
, e
->mask
, e
->size
);
615 static struct inode
*bm_get_inode(struct super_block
*sb
, int mode
)
617 struct inode
*inode
= new_inode(sb
);
620 inode
->i_ino
= get_next_ino();
621 inode
->i_mode
= mode
;
622 simple_inode_init_ts(inode
);
628 * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode
629 * @inode: inode of the relevant binfmt_misc instance
631 * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can
632 * be done without any memory barriers because we are guaranteed that
633 * user_ns->binfmt_misc is fully initialized. It was fully initialized when the
634 * binfmt_misc mount was first created.
636 * Return: struct binfmt_misc of the relevant binfmt_misc instance
638 static struct binfmt_misc
*i_binfmt_misc(struct inode
*inode
)
640 return inode
->i_sb
->s_user_ns
->binfmt_misc
;
644 * bm_evict_inode - cleanup data associated with @inode
645 * @inode: inode to which the data is attached
647 * Cleanup the binary type handler data associated with @inode if a binary type
648 * entry is removed or the filesystem is unmounted and the super block is
651 * If the ->evict call was not caused by a super block shutdown but by a write
652 * to remove the entry or all entries via bm_{entry,status}_write() the entry
653 * will have already been removed from the list. We keep the list_empty() check
654 * to make that explicit.
656 static void bm_evict_inode(struct inode
*inode
)
658 Node
*e
= inode
->i_private
;
663 struct binfmt_misc
*misc
;
665 misc
= i_binfmt_misc(inode
);
666 write_lock(&misc
->entries_lock
);
667 if (!list_empty(&e
->list
))
668 list_del_init(&e
->list
);
669 write_unlock(&misc
->entries_lock
);
670 put_binfmt_handler(e
);
675 * unlink_binfmt_dentry - remove the dentry for the binary type handler
676 * @dentry: dentry associated with the binary type handler
678 * Do the actual filesystem work to remove a dentry for a registered binary
679 * type handler. Since binfmt_misc only allows simple files to be created
680 * directly under the root dentry of the filesystem we ensure that we are
681 * indeed passed a dentry directly beneath the root dentry, that the inode
682 * associated with the root dentry is locked, and that it is a regular file we
683 * are asked to remove.
685 static void unlink_binfmt_dentry(struct dentry
*dentry
)
687 struct dentry
*parent
= dentry
->d_parent
;
688 struct inode
*inode
, *parent_inode
;
690 /* All entries are immediate descendants of the root dentry. */
691 if (WARN_ON_ONCE(dentry
->d_sb
->s_root
!= parent
))
694 /* We only expect to be called on regular files. */
695 inode
= d_inode(dentry
);
696 if (WARN_ON_ONCE(!S_ISREG(inode
->i_mode
)))
699 /* The parent inode must be locked. */
700 parent_inode
= d_inode(parent
);
701 if (WARN_ON_ONCE(!inode_is_locked(parent_inode
)))
704 if (simple_positive(dentry
)) {
706 simple_unlink(parent_inode
, dentry
);
713 * remove_binfmt_handler - remove a binary type handler
714 * @misc: handle to binfmt_misc instance
715 * @e: binary type handler to remove
717 * Remove a binary type handler from the list of binary type handlers and
718 * remove its associated dentry. This is called from
719 * binfmt_{entry,status}_write(). In the future, we might want to think about
720 * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's
721 * to use writes to files in order to delete binary type handlers. But it has
722 * worked for so long that it's not a pressing issue.
724 static void remove_binfmt_handler(struct binfmt_misc
*misc
, Node
*e
)
726 write_lock(&misc
->entries_lock
);
727 list_del_init(&e
->list
);
728 write_unlock(&misc
->entries_lock
);
729 unlink_binfmt_dentry(e
->dentry
);
735 bm_entry_read(struct file
*file
, char __user
*buf
, size_t nbytes
, loff_t
*ppos
)
737 Node
*e
= file_inode(file
)->i_private
;
741 page
= (char *) __get_free_page(GFP_KERNEL
);
745 entry_status(e
, page
);
747 res
= simple_read_from_buffer(buf
, nbytes
, ppos
, page
, strlen(page
));
749 free_page((unsigned long) page
);
753 static ssize_t
bm_entry_write(struct file
*file
, const char __user
*buffer
,
754 size_t count
, loff_t
*ppos
)
756 struct inode
*inode
= file_inode(file
);
757 Node
*e
= inode
->i_private
;
758 int res
= parse_command(buffer
, count
);
762 /* Disable this handler. */
763 clear_bit(Enabled
, &e
->flags
);
766 /* Enable this handler. */
767 set_bit(Enabled
, &e
->flags
);
770 /* Delete this handler. */
771 inode
= d_inode(inode
->i_sb
->s_root
);
775 * In order to add new element or remove elements from the list
776 * via bm_{entry,register,status}_write() inode_lock() on the
777 * root inode must be held.
778 * The lock is exclusive ensuring that the list can't be
779 * modified. Only load_misc_binary() can access but does so
780 * read-only. So we only need to take the write lock when we
781 * actually remove the entry from the list.
783 if (!list_empty(&e
->list
))
784 remove_binfmt_handler(i_binfmt_misc(inode
), e
);
795 static const struct file_operations bm_entry_operations
= {
796 .read
= bm_entry_read
,
797 .write
= bm_entry_write
,
798 .llseek
= default_llseek
,
803 static ssize_t
bm_register_write(struct file
*file
, const char __user
*buffer
,
804 size_t count
, loff_t
*ppos
)
808 struct super_block
*sb
= file_inode(file
)->i_sb
;
809 struct dentry
*root
= sb
->s_root
, *dentry
;
810 struct binfmt_misc
*misc
;
812 struct file
*f
= NULL
;
814 e
= create_entry(buffer
, count
);
819 if (e
->flags
& MISC_FMT_OPEN_FILE
) {
820 const struct cred
*old_cred
;
823 * Now that we support unprivileged binfmt_misc mounts make
824 * sure we use the credentials that the register @file was
825 * opened with to also open the interpreter. Before that this
826 * didn't matter much as only a privileged process could open
829 old_cred
= override_creds(file
->f_cred
);
830 f
= open_exec(e
->interpreter
);
831 revert_creds(old_cred
);
833 pr_notice("register: failed to install interpreter file %s\n",
841 inode_lock(d_inode(root
));
842 dentry
= lookup_one_len(e
->name
, root
, strlen(e
->name
));
843 err
= PTR_ERR(dentry
);
848 if (d_really_is_positive(dentry
))
851 inode
= bm_get_inode(sb
, S_IFREG
| 0644);
857 refcount_set(&e
->users
, 1);
858 e
->dentry
= dget(dentry
);
859 inode
->i_private
= e
;
860 inode
->i_fop
= &bm_entry_operations
;
862 d_instantiate(dentry
, inode
);
863 misc
= i_binfmt_misc(inode
);
864 write_lock(&misc
->entries_lock
);
865 list_add(&e
->list
, &misc
->entries
);
866 write_unlock(&misc
->entries_lock
);
872 inode_unlock(d_inode(root
));
883 static const struct file_operations bm_register_operations
= {
884 .write
= bm_register_write
,
885 .llseek
= noop_llseek
,
891 bm_status_read(struct file
*file
, char __user
*buf
, size_t nbytes
, loff_t
*ppos
)
893 struct binfmt_misc
*misc
;
896 misc
= i_binfmt_misc(file_inode(file
));
897 s
= misc
->enabled
? "enabled\n" : "disabled\n";
898 return simple_read_from_buffer(buf
, nbytes
, ppos
, s
, strlen(s
));
901 static ssize_t
bm_status_write(struct file
*file
, const char __user
*buffer
,
902 size_t count
, loff_t
*ppos
)
904 struct binfmt_misc
*misc
;
905 int res
= parse_command(buffer
, count
);
909 misc
= i_binfmt_misc(file_inode(file
));
912 /* Disable all handlers. */
913 misc
->enabled
= false;
916 /* Enable all handlers. */
917 misc
->enabled
= true;
920 /* Delete all handlers. */
921 inode
= d_inode(file_inode(file
)->i_sb
->s_root
);
925 * In order to add new element or remove elements from the list
926 * via bm_{entry,register,status}_write() inode_lock() on the
927 * root inode must be held.
928 * The lock is exclusive ensuring that the list can't be
929 * modified. Only load_misc_binary() can access but does so
930 * read-only. So we only need to take the write lock when we
931 * actually remove the entry from the list.
933 list_for_each_entry_safe(e
, next
, &misc
->entries
, list
)
934 remove_binfmt_handler(misc
, e
);
945 static const struct file_operations bm_status_operations
= {
946 .read
= bm_status_read
,
947 .write
= bm_status_write
,
948 .llseek
= default_llseek
,
951 /* Superblock handling */
953 static void bm_put_super(struct super_block
*sb
)
955 struct user_namespace
*user_ns
= sb
->s_fs_info
;
957 sb
->s_fs_info
= NULL
;
958 put_user_ns(user_ns
);
961 static const struct super_operations s_ops
= {
962 .statfs
= simple_statfs
,
963 .evict_inode
= bm_evict_inode
,
964 .put_super
= bm_put_super
,
967 static int bm_fill_super(struct super_block
*sb
, struct fs_context
*fc
)
970 struct user_namespace
*user_ns
= sb
->s_user_ns
;
971 struct binfmt_misc
*misc
;
972 static const struct tree_descr bm_files
[] = {
973 [2] = {"status", &bm_status_operations
, S_IWUSR
|S_IRUGO
},
974 [3] = {"register", &bm_register_operations
, S_IWUSR
},
978 if (WARN_ON(user_ns
!= current_user_ns()))
982 * Lazily allocate a new binfmt_misc instance for this namespace, i.e.
983 * do it here during the first mount of binfmt_misc. We don't need to
984 * waste memory for every user namespace allocation. It's likely much
985 * more common to not mount a separate binfmt_misc instance than it is
988 * While multiple superblocks can exist they are keyed by userns in
989 * s_fs_info for binfmt_misc. Hence, the vfs guarantees that
990 * bm_fill_super() is called exactly once whenever a binfmt_misc
991 * superblock for a userns is created. This in turn lets us conclude
992 * that when a binfmt_misc superblock is created for the first time for
993 * a userns there's no one racing us. Therefore we don't need any
994 * barriers when we dereference binfmt_misc.
996 misc
= user_ns
->binfmt_misc
;
999 * If it turns out that most user namespaces actually want to
1000 * register their own binary type handler and therefore all
1001 * create their own separate binfm_misc mounts we should
1002 * consider turning this into a kmem cache.
1004 misc
= kzalloc(sizeof(struct binfmt_misc
), GFP_KERNEL
);
1008 INIT_LIST_HEAD(&misc
->entries
);
1009 rwlock_init(&misc
->entries_lock
);
1011 /* Pairs with smp_load_acquire() in load_binfmt_misc(). */
1012 smp_store_release(&user_ns
->binfmt_misc
, misc
);
1016 * When the binfmt_misc superblock for this userns is shutdown
1017 * ->enabled might have been set to false and we don't reinitialize
1018 * ->enabled again in put_super() as someone might already be mounting
1019 * binfmt_misc again. It also would be pointless since by the time
1020 * ->put_super() is called we know that the binary type list for this
1021 * bintfmt_misc mount is empty making load_misc_binary() return
1022 * -ENOEXEC independent of whether ->enabled is true. Instead, if
1023 * someone mounts binfmt_misc for the first time or again we simply
1024 * reset ->enabled to true.
1026 misc
->enabled
= true;
1028 err
= simple_fill_super(sb
, BINFMTFS_MAGIC
, bm_files
);
1034 static void bm_free(struct fs_context
*fc
)
1037 put_user_ns(fc
->s_fs_info
);
1040 static int bm_get_tree(struct fs_context
*fc
)
1042 return get_tree_keyed(fc
, bm_fill_super
, get_user_ns(fc
->user_ns
));
1045 static const struct fs_context_operations bm_context_ops
= {
1047 .get_tree
= bm_get_tree
,
1050 static int bm_init_fs_context(struct fs_context
*fc
)
1052 fc
->ops
= &bm_context_ops
;
1056 static struct linux_binfmt misc_format
= {
1057 .module
= THIS_MODULE
,
1058 .load_binary
= load_misc_binary
,
1061 static struct file_system_type bm_fs_type
= {
1062 .owner
= THIS_MODULE
,
1063 .name
= "binfmt_misc",
1064 .init_fs_context
= bm_init_fs_context
,
1065 .fs_flags
= FS_USERNS_MOUNT
,
1066 .kill_sb
= kill_litter_super
,
1068 MODULE_ALIAS_FS("binfmt_misc");
1070 static int __init
init_misc_binfmt(void)
1072 int err
= register_filesystem(&bm_fs_type
);
1074 insert_binfmt(&misc_format
);
1078 static void __exit
exit_misc_binfmt(void)
1080 unregister_binfmt(&misc_format
);
1081 unregister_filesystem(&bm_fs_type
);
1084 core_initcall(init_misc_binfmt
);
1085 module_exit(exit_misc_binfmt
);
1086 MODULE_DESCRIPTION("Kernel support for miscellaneous binaries");
1087 MODULE_LICENSE("GPL");