1 // SPDX-License-Identifier: GPL-2.0
3 * The base64 encode/decode code was copied from fscrypt:
4 * Copyright (C) 2015, Google, Inc.
5 * Copyright (C) 2015, Motorola Mobility
6 * Written by Uday Savagaonkar, 2014.
7 * Modified by Jaegeuk Kim, 2015.
9 #include <linux/ceph/ceph_debug.h>
10 #include <linux/xattr.h>
11 #include <linux/fscrypt.h>
12 #include <linux/ceph/striper.h>
15 #include "mds_client.h"
19 * The base64url encoding used by fscrypt includes the '_' character, which may
20 * cause problems in snapshot names (which can not start with '_'). Thus, we
21 * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead,
22 * which replaces '-' and '_' by '+' and ','.
24 static const char base64_table
[65] =
25 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
27 int ceph_base64_encode(const u8
*src
, int srclen
, char *dst
)
34 for (i
= 0; i
< srclen
; i
++) {
35 ac
= (ac
<< 8) | src
[i
];
39 *cp
++ = base64_table
[(ac
>> bits
) & 0x3f];
43 *cp
++ = base64_table
[(ac
<< (6 - bits
)) & 0x3f];
47 int ceph_base64_decode(const char *src
, int srclen
, u8
*dst
)
54 for (i
= 0; i
< srclen
; i
++) {
55 const char *p
= strchr(base64_table
, src
[i
]);
57 if (p
== NULL
|| src
[i
] == 0)
59 ac
= (ac
<< 6) | (p
- base64_table
);
63 *bp
++ = (u8
)(ac
>> bits
);
66 if (ac
& ((1 << bits
) - 1))
71 static int ceph_crypt_get_context(struct inode
*inode
, void *ctx
, size_t len
)
73 struct ceph_inode_info
*ci
= ceph_inode(inode
);
74 struct ceph_fscrypt_auth
*cfa
= (struct ceph_fscrypt_auth
*)ci
->fscrypt_auth
;
77 /* Non existent or too short? */
78 if (!cfa
|| (ci
->fscrypt_auth_len
< (offsetof(struct ceph_fscrypt_auth
, cfa_blob
) + 1)))
81 /* Some format we don't recognize? */
82 if (le32_to_cpu(cfa
->cfa_version
) != CEPH_FSCRYPT_AUTH_VERSION
)
85 ctxlen
= le32_to_cpu(cfa
->cfa_blob_len
);
89 memcpy(ctx
, cfa
->cfa_blob
, ctxlen
);
93 static int ceph_crypt_set_context(struct inode
*inode
, const void *ctx
,
94 size_t len
, void *fs_data
)
97 struct iattr attr
= { };
98 struct ceph_iattr cia
= { };
99 struct ceph_fscrypt_auth
*cfa
;
101 WARN_ON_ONCE(fs_data
);
103 if (len
> FSCRYPT_SET_CONTEXT_MAX_SIZE
)
106 cfa
= kzalloc(sizeof(*cfa
), GFP_KERNEL
);
110 cfa
->cfa_version
= cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION
);
111 cfa
->cfa_blob_len
= cpu_to_le32(len
);
112 memcpy(cfa
->cfa_blob
, ctx
, len
);
114 cia
.fscrypt_auth
= cfa
;
116 ret
= __ceph_setattr(&nop_mnt_idmap
, inode
, &attr
, &cia
);
118 inode_set_flags(inode
, S_ENCRYPTED
, S_ENCRYPTED
);
119 kfree(cia
.fscrypt_auth
);
123 static bool ceph_crypt_empty_dir(struct inode
*inode
)
125 struct ceph_inode_info
*ci
= ceph_inode(inode
);
127 return ci
->i_rsubdirs
+ ci
->i_rfiles
== 1;
130 static const union fscrypt_policy
*ceph_get_dummy_policy(struct super_block
*sb
)
132 return ceph_sb_to_fs_client(sb
)->fsc_dummy_enc_policy
.policy
;
135 static struct fscrypt_operations ceph_fscrypt_ops
= {
136 .needs_bounce_pages
= 1,
137 .get_context
= ceph_crypt_get_context
,
138 .set_context
= ceph_crypt_set_context
,
139 .get_dummy_policy
= ceph_get_dummy_policy
,
140 .empty_dir
= ceph_crypt_empty_dir
,
143 void ceph_fscrypt_set_ops(struct super_block
*sb
)
145 fscrypt_set_ops(sb
, &ceph_fscrypt_ops
);
148 void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client
*fsc
)
150 fscrypt_free_dummy_policy(&fsc
->fsc_dummy_enc_policy
);
153 int ceph_fscrypt_prepare_context(struct inode
*dir
, struct inode
*inode
,
154 struct ceph_acl_sec_ctx
*as
)
157 bool encrypted
= false;
158 struct ceph_inode_info
*ci
= ceph_inode(inode
);
160 ret
= fscrypt_prepare_new_inode(dir
, inode
, &encrypted
);
166 as
->fscrypt_auth
= kzalloc(sizeof(*as
->fscrypt_auth
), GFP_KERNEL
);
167 if (!as
->fscrypt_auth
)
170 ctxsize
= fscrypt_context_for_new_inode(as
->fscrypt_auth
->cfa_blob
,
175 as
->fscrypt_auth
->cfa_version
= cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION
);
176 as
->fscrypt_auth
->cfa_blob_len
= cpu_to_le32(ctxsize
);
178 WARN_ON_ONCE(ci
->fscrypt_auth
);
179 kfree(ci
->fscrypt_auth
);
180 ci
->fscrypt_auth_len
= ceph_fscrypt_auth_len(as
->fscrypt_auth
);
181 ci
->fscrypt_auth
= kmemdup(as
->fscrypt_auth
, ci
->fscrypt_auth_len
,
183 if (!ci
->fscrypt_auth
)
186 inode
->i_flags
|= S_ENCRYPTED
;
191 void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request
*req
,
192 struct ceph_acl_sec_ctx
*as
)
194 swap(req
->r_fscrypt_auth
, as
->fscrypt_auth
);
198 * User-created snapshots can't start with '_'. Snapshots that start with this
199 * character are special (hint: there aren't real snapshots) and use the
202 * _<SNAPSHOT-NAME>_<INODE-NUMBER>
205 * - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted,
206 * - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot
208 * This function parses these snapshot names and returns the inode
209 * <INODE-NUMBER>. 'name_len' will also bet set with the <SNAPSHOT-NAME>
212 static struct inode
*parse_longname(const struct inode
*parent
,
213 const char *name
, int *name_len
)
215 struct ceph_client
*cl
= ceph_inode_to_client(parent
);
216 struct inode
*dir
= NULL
;
217 struct ceph_vino vino
= { .snap
= CEPH_NOSNAP
};
220 int orig_len
= *name_len
;
223 /* Skip initial '_' */
225 name_end
= strrchr(name
, '_');
227 doutc(cl
, "failed to parse long snapshot name: %s\n", name
);
228 return ERR_PTR(-EIO
);
230 *name_len
= (name_end
- name
);
231 if (*name_len
<= 0) {
232 pr_err_client(cl
, "failed to parse long snapshot name\n");
233 return ERR_PTR(-EIO
);
236 /* Get the inode number */
237 inode_number
= kmemdup_nul(name_end
+ 1,
238 orig_len
- *name_len
- 2,
241 return ERR_PTR(-ENOMEM
);
242 ret
= kstrtou64(inode_number
, 10, &vino
.ino
);
244 doutc(cl
, "failed to parse inode number: %s\n", name
);
249 /* And finally the inode */
250 dir
= ceph_find_inode(parent
->i_sb
, vino
);
252 /* This can happen if we're not mounting cephfs on the root */
253 dir
= ceph_get_inode(parent
->i_sb
, vino
, NULL
);
255 doutc(cl
, "can't find inode %s (%s)\n", inode_number
, name
);
263 int ceph_encode_encrypted_dname(struct inode
*parent
, struct qstr
*d_name
,
266 struct ceph_client
*cl
= ceph_inode_to_client(parent
);
267 struct inode
*dir
= parent
;
275 iname
.name
= d_name
->name
;
276 name_len
= d_name
->len
;
278 /* Handle the special case of snapshot names that start with '_' */
279 if ((ceph_snap(dir
) == CEPH_SNAPDIR
) && (name_len
> 0) &&
280 (iname
.name
[0] == '_')) {
281 dir
= parse_longname(parent
, iname
.name
, &name_len
);
284 iname
.name
++; /* skip initial '_' */
286 iname
.len
= name_len
;
288 if (!fscrypt_has_encryption_key(dir
)) {
289 memcpy(buf
, d_name
->name
, d_name
->len
);
295 * Convert cleartext d_name to ciphertext. If result is longer than
296 * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes
298 * See: fscrypt_setup_filename
300 if (!fscrypt_fname_encrypted_size(dir
, iname
.len
, NAME_MAX
, &len
)) {
301 elen
= -ENAMETOOLONG
;
305 /* Allocate a buffer appropriate to hold the result */
306 cryptbuf
= kmalloc(len
> CEPH_NOHASH_NAME_MAX
? NAME_MAX
: len
,
313 ret
= fscrypt_fname_encrypt(dir
, &iname
, cryptbuf
, len
);
319 /* hash the end if the name is long enough */
320 if (len
> CEPH_NOHASH_NAME_MAX
) {
321 u8 hash
[SHA256_DIGEST_SIZE
];
322 u8
*extra
= cryptbuf
+ CEPH_NOHASH_NAME_MAX
;
325 * hash the extra bytes and overwrite crypttext beyond that
328 sha256(extra
, len
- CEPH_NOHASH_NAME_MAX
, hash
);
329 memcpy(extra
, hash
, SHA256_DIGEST_SIZE
);
330 len
= CEPH_NOHASH_NAME_MAX
+ SHA256_DIGEST_SIZE
;
333 /* base64 encode the encrypted name */
334 elen
= ceph_base64_encode(cryptbuf
, len
, buf
);
335 doutc(cl
, "base64-encoded ciphertext name = %.*s\n", elen
, buf
);
337 /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
339 if ((elen
> 0) && (dir
!= parent
)) {
340 char tmp_buf
[NAME_MAX
];
342 elen
= snprintf(tmp_buf
, sizeof(tmp_buf
), "_%.*s_%ld",
343 elen
, buf
, dir
->i_ino
);
344 memcpy(buf
, tmp_buf
, elen
);
350 if ((dir
->i_state
& I_NEW
))
351 discard_new_inode(dir
);
358 int ceph_encode_encrypted_fname(struct inode
*parent
, struct dentry
*dentry
,
361 WARN_ON_ONCE(!fscrypt_has_encryption_key(parent
));
363 return ceph_encode_encrypted_dname(parent
, &dentry
->d_name
, buf
);
367 * ceph_fname_to_usr - convert a filename for userland presentation
368 * @fname: ceph_fname to be converted
369 * @tname: temporary name buffer to use for conversion (may be NULL)
370 * @oname: where converted name should be placed
371 * @is_nokey: set to true if key wasn't available during conversion (may be NULL)
373 * Given a filename (usually from the MDS), format it for presentation to
374 * userland. If @parent is not encrypted, just pass it back as-is.
376 * Otherwise, base64 decode the string, and then ask fscrypt to format it
377 * for userland presentation.
379 * Returns 0 on success or negative error code on error.
381 int ceph_fname_to_usr(const struct ceph_fname
*fname
, struct fscrypt_str
*tname
,
382 struct fscrypt_str
*oname
, bool *is_nokey
)
384 struct inode
*dir
= fname
->dir
;
385 struct fscrypt_str _tname
= FSTR_INIT(NULL
, 0);
386 struct fscrypt_str iname
;
387 char *name
= fname
->name
;
388 int name_len
= fname
->name_len
;
391 /* Sanity check that the resulting name will fit in the buffer */
392 if (fname
->name_len
> NAME_MAX
|| fname
->ctext_len
> NAME_MAX
)
395 /* Handle the special case of snapshot names that start with '_' */
396 if ((ceph_snap(dir
) == CEPH_SNAPDIR
) && (name_len
> 0) &&
398 dir
= parse_longname(dir
, name
, &name_len
);
401 name
++; /* skip initial '_' */
404 if (!IS_ENCRYPTED(dir
)) {
405 oname
->name
= fname
->name
;
406 oname
->len
= fname
->name_len
;
411 ret
= ceph_fscrypt_prepare_readdir(dir
);
416 * Use the raw dentry name as sent by the MDS instead of
417 * generating a nokey name via fscrypt.
419 if (!fscrypt_has_encryption_key(dir
)) {
421 oname
->name
= fname
->name
;
423 memcpy(oname
->name
, fname
->name
, fname
->name_len
);
424 oname
->len
= fname
->name_len
;
431 if (fname
->ctext_len
== 0) {
435 ret
= fscrypt_fname_alloc_buffer(NAME_MAX
, &_tname
);
441 declen
= ceph_base64_decode(name
, name_len
, tname
->name
);
446 iname
.name
= tname
->name
;
449 iname
.name
= fname
->ctext
;
450 iname
.len
= fname
->ctext_len
;
453 ret
= fscrypt_fname_disk_to_usr(dir
, 0, 0, &iname
, oname
);
454 if (!ret
&& (dir
!= fname
->dir
)) {
455 char tmp_buf
[CEPH_BASE64_CHARS(NAME_MAX
)];
457 name_len
= snprintf(tmp_buf
, sizeof(tmp_buf
), "_%.*s_%ld",
458 oname
->len
, oname
->name
, dir
->i_ino
);
459 memcpy(oname
->name
, tmp_buf
, name_len
);
460 oname
->len
= name_len
;
464 fscrypt_fname_free_buffer(&_tname
);
466 if (dir
!= fname
->dir
) {
467 if ((dir
->i_state
& I_NEW
))
468 discard_new_inode(dir
);
476 * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper
477 * @dir: directory inode for readdir prep
479 * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as
480 * non-complete if this call results in having the directory unlocked.
483 * 1 - if directory was locked and key is now loaded (i.e. dir is unlocked)
484 * 0 - if directory is still locked
485 * < 0 - if __fscrypt_prepare_readdir() fails
487 int ceph_fscrypt_prepare_readdir(struct inode
*dir
)
489 bool had_key
= fscrypt_has_encryption_key(dir
);
492 if (!IS_ENCRYPTED(dir
))
495 err
= __fscrypt_prepare_readdir(dir
);
498 if (!had_key
&& fscrypt_has_encryption_key(dir
)) {
499 /* directory just got unlocked, mark it as not complete */
500 ceph_dir_clear_complete(dir
);
506 int ceph_fscrypt_decrypt_block_inplace(const struct inode
*inode
,
507 struct page
*page
, unsigned int len
,
508 unsigned int offs
, u64 lblk_num
)
510 struct ceph_client
*cl
= ceph_inode_to_client(inode
);
512 doutc(cl
, "%p %llx.%llx len %u offs %u blk %llu\n", inode
,
513 ceph_vinop(inode
), len
, offs
, lblk_num
);
514 return fscrypt_decrypt_block_inplace(inode
, page
, len
, offs
, lblk_num
);
517 int ceph_fscrypt_encrypt_block_inplace(const struct inode
*inode
,
518 struct page
*page
, unsigned int len
,
519 unsigned int offs
, u64 lblk_num
,
522 struct ceph_client
*cl
= ceph_inode_to_client(inode
);
524 doutc(cl
, "%p %llx.%llx len %u offs %u blk %llu\n", inode
,
525 ceph_vinop(inode
), len
, offs
, lblk_num
);
526 return fscrypt_encrypt_block_inplace(inode
, page
, len
, offs
, lblk_num
,
531 * ceph_fscrypt_decrypt_pages - decrypt an array of pages
532 * @inode: pointer to inode associated with these pages
533 * @page: pointer to page array
534 * @off: offset into the file that the read data starts
535 * @len: max length to decrypt
537 * Decrypt an array of fscrypt'ed pages and return the amount of
538 * data decrypted. Any data in the page prior to the start of the
539 * first complete block in the read is ignored. Any incomplete
540 * crypto blocks at the end of the array are ignored (and should
541 * probably be zeroed by the caller).
543 * Returns the length of the decrypted data or a negative errno.
545 int ceph_fscrypt_decrypt_pages(struct inode
*inode
, struct page
**page
,
549 u64 baseblk
= off
>> CEPH_FSCRYPT_BLOCK_SHIFT
;
553 * We can't deal with partial blocks on an encrypted file, so mask off
556 num_blocks
= ceph_fscrypt_blocks(off
, len
& CEPH_FSCRYPT_BLOCK_MASK
);
558 /* Decrypt each block */
559 for (i
= 0; i
< num_blocks
; ++i
) {
560 int blkoff
= i
<< CEPH_FSCRYPT_BLOCK_SHIFT
;
561 int pgidx
= blkoff
>> PAGE_SHIFT
;
562 unsigned int pgoffs
= offset_in_page(blkoff
);
565 fret
= ceph_fscrypt_decrypt_block_inplace(inode
, page
[pgidx
],
566 CEPH_FSCRYPT_BLOCK_SIZE
, pgoffs
,
573 ret
+= CEPH_FSCRYPT_BLOCK_SIZE
;
579 * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer
580 * @inode: inode associated with pages being decrypted
581 * @page: pointer to page array
582 * @off: offset into the file that the data in page[0] starts
583 * @map: pointer to extent array
584 * @ext_cnt: length of extent array
586 * Given an extent map and a page array, decrypt the received data in-place,
587 * skipping holes. Returns the offset into buffer of end of last decrypted
590 int ceph_fscrypt_decrypt_extents(struct inode
*inode
, struct page
**page
,
591 u64 off
, struct ceph_sparse_extent
*map
,
594 struct ceph_client
*cl
= ceph_inode_to_client(inode
);
596 struct ceph_inode_info
*ci
= ceph_inode(inode
);
600 /* Nothing to do for empty array */
602 doutc(cl
, "%p %llx.%llx empty array, ret 0\n", inode
,
607 ceph_calc_file_object_mapping(&ci
->i_layout
, off
, map
[0].len
,
608 &objno
, &objoff
, &xlen
);
610 for (i
= 0; i
< ext_cnt
; ++i
) {
611 struct ceph_sparse_extent
*ext
= &map
[i
];
612 int pgsoff
= ext
->off
- objoff
;
613 int pgidx
= pgsoff
>> PAGE_SHIFT
;
616 if ((ext
->off
| ext
->len
) & ~CEPH_FSCRYPT_BLOCK_MASK
) {
618 "%p %llx.%llx bad encrypted sparse extent "
619 "idx %d off %llx len %llx\n",
620 inode
, ceph_vinop(inode
), i
, ext
->off
,
624 fret
= ceph_fscrypt_decrypt_pages(inode
, &page
[pgidx
],
625 off
+ pgsoff
, ext
->len
);
626 doutc(cl
, "%p %llx.%llx [%d] 0x%llx~0x%llx fret %d\n", inode
,
627 ceph_vinop(inode
), i
, ext
->off
, ext
->len
, fret
);
635 doutc(cl
, "ret %d\n", ret
);
640 * ceph_fscrypt_encrypt_pages - encrypt an array of pages
641 * @inode: pointer to inode associated with these pages
642 * @page: pointer to page array
643 * @off: offset into the file that the data starts
644 * @len: max length to encrypt
645 * @gfp: gfp flags to use for allocation
647 * Decrypt an array of cleartext pages and return the amount of
648 * data encrypted. Any data in the page prior to the start of the
649 * first complete block in the read is ignored. Any incomplete
650 * crypto blocks at the end of the array are ignored.
652 * Returns the length of the encrypted data or a negative errno.
654 int ceph_fscrypt_encrypt_pages(struct inode
*inode
, struct page
**page
, u64 off
,
658 u64 baseblk
= off
>> CEPH_FSCRYPT_BLOCK_SHIFT
;
662 * We can't deal with partial blocks on an encrypted file, so mask off
665 num_blocks
= ceph_fscrypt_blocks(off
, len
& CEPH_FSCRYPT_BLOCK_MASK
);
667 /* Encrypt each block */
668 for (i
= 0; i
< num_blocks
; ++i
) {
669 int blkoff
= i
<< CEPH_FSCRYPT_BLOCK_SHIFT
;
670 int pgidx
= blkoff
>> PAGE_SHIFT
;
671 unsigned int pgoffs
= offset_in_page(blkoff
);
674 fret
= ceph_fscrypt_encrypt_block_inplace(inode
, page
[pgidx
],
675 CEPH_FSCRYPT_BLOCK_SIZE
, pgoffs
,
682 ret
+= CEPH_FSCRYPT_BLOCK_SIZE
;