Linux 4.19.133
[linux/fpc-iii.git] / fs / btrfs / send.c
blob2bc80d0b56dba9ff5196b7034bf7ef824809e492
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2012 Alexander Block. All rights reserved.
4 */
6 #include <linux/bsearch.h>
7 #include <linux/fs.h>
8 #include <linux/file.h>
9 #include <linux/sort.h>
10 #include <linux/mount.h>
11 #include <linux/xattr.h>
12 #include <linux/posix_acl_xattr.h>
13 #include <linux/radix-tree.h>
14 #include <linux/vmalloc.h>
15 #include <linux/string.h>
16 #include <linux/compat.h>
17 #include <linux/crc32c.h>
19 #include "send.h"
20 #include "backref.h"
21 #include "locking.h"
22 #include "disk-io.h"
23 #include "btrfs_inode.h"
24 #include "transaction.h"
25 #include "compression.h"
26 #include "xattr.h"
29 * Maximum number of references an extent can have in order for us to attempt to
30 * issue clone operations instead of write operations. This currently exists to
31 * avoid hitting limitations of the backreference walking code (taking a lot of
32 * time and using too much memory for extents with large number of references).
34 #define SEND_MAX_EXTENT_REFS 64
37 * A fs_path is a helper to dynamically build path names with unknown size.
38 * It reallocates the internal buffer on demand.
39 * It allows fast adding of path elements on the right side (normal path) and
40 * fast adding to the left side (reversed path). A reversed path can also be
41 * unreversed if needed.
43 struct fs_path {
44 union {
45 struct {
46 char *start;
47 char *end;
49 char *buf;
50 unsigned short buf_len:15;
51 unsigned short reversed:1;
52 char inline_buf[];
55 * Average path length does not exceed 200 bytes, we'll have
56 * better packing in the slab and higher chance to satisfy
57 * a allocation later during send.
59 char pad[256];
62 #define FS_PATH_INLINE_SIZE \
63 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
66 /* reused for each extent */
67 struct clone_root {
68 struct btrfs_root *root;
69 u64 ino;
70 u64 offset;
72 u64 found_refs;
75 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
76 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
78 struct send_ctx {
79 struct file *send_filp;
80 loff_t send_off;
81 char *send_buf;
82 u32 send_size;
83 u32 send_max_size;
84 u64 total_send_size;
85 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
86 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
88 struct btrfs_root *send_root;
89 struct btrfs_root *parent_root;
90 struct clone_root *clone_roots;
91 int clone_roots_cnt;
93 /* current state of the compare_tree call */
94 struct btrfs_path *left_path;
95 struct btrfs_path *right_path;
96 struct btrfs_key *cmp_key;
99 * infos of the currently processed inode. In case of deleted inodes,
100 * these are the values from the deleted inode.
102 u64 cur_ino;
103 u64 cur_inode_gen;
104 int cur_inode_new;
105 int cur_inode_new_gen;
106 int cur_inode_deleted;
107 u64 cur_inode_size;
108 u64 cur_inode_mode;
109 u64 cur_inode_rdev;
110 u64 cur_inode_last_extent;
111 u64 cur_inode_next_write_offset;
112 bool ignore_cur_inode;
114 u64 send_progress;
116 struct list_head new_refs;
117 struct list_head deleted_refs;
119 struct radix_tree_root name_cache;
120 struct list_head name_cache_list;
121 int name_cache_size;
123 struct file_ra_state ra;
125 char *read_buf;
128 * We process inodes by their increasing order, so if before an
129 * incremental send we reverse the parent/child relationship of
130 * directories such that a directory with a lower inode number was
131 * the parent of a directory with a higher inode number, and the one
132 * becoming the new parent got renamed too, we can't rename/move the
133 * directory with lower inode number when we finish processing it - we
134 * must process the directory with higher inode number first, then
135 * rename/move it and then rename/move the directory with lower inode
136 * number. Example follows.
138 * Tree state when the first send was performed:
141 * |-- a (ino 257)
142 * |-- b (ino 258)
145 * |-- c (ino 259)
146 * | |-- d (ino 260)
148 * |-- c2 (ino 261)
150 * Tree state when the second (incremental) send is performed:
153 * |-- a (ino 257)
154 * |-- b (ino 258)
155 * |-- c2 (ino 261)
156 * |-- d2 (ino 260)
157 * |-- cc (ino 259)
159 * The sequence of steps that lead to the second state was:
161 * mv /a/b/c/d /a/b/c2/d2
162 * mv /a/b/c /a/b/c2/d2/cc
164 * "c" has lower inode number, but we can't move it (2nd mv operation)
165 * before we move "d", which has higher inode number.
167 * So we just memorize which move/rename operations must be performed
168 * later when their respective parent is processed and moved/renamed.
171 /* Indexed by parent directory inode number. */
172 struct rb_root pending_dir_moves;
175 * Reverse index, indexed by the inode number of a directory that
176 * is waiting for the move/rename of its immediate parent before its
177 * own move/rename can be performed.
179 struct rb_root waiting_dir_moves;
182 * A directory that is going to be rm'ed might have a child directory
183 * which is in the pending directory moves index above. In this case,
184 * the directory can only be removed after the move/rename of its child
185 * is performed. Example:
187 * Parent snapshot:
189 * . (ino 256)
190 * |-- a/ (ino 257)
191 * |-- b/ (ino 258)
192 * |-- c/ (ino 259)
193 * | |-- x/ (ino 260)
195 * |-- y/ (ino 261)
197 * Send snapshot:
199 * . (ino 256)
200 * |-- a/ (ino 257)
201 * |-- b/ (ino 258)
202 * |-- YY/ (ino 261)
203 * |-- x/ (ino 260)
205 * Sequence of steps that lead to the send snapshot:
206 * rm -f /a/b/c/foo.txt
207 * mv /a/b/y /a/b/YY
208 * mv /a/b/c/x /a/b/YY
209 * rmdir /a/b/c
211 * When the child is processed, its move/rename is delayed until its
212 * parent is processed (as explained above), but all other operations
213 * like update utimes, chown, chgrp, etc, are performed and the paths
214 * that it uses for those operations must use the orphanized name of
215 * its parent (the directory we're going to rm later), so we need to
216 * memorize that name.
218 * Indexed by the inode number of the directory to be deleted.
220 struct rb_root orphan_dirs;
223 struct pending_dir_move {
224 struct rb_node node;
225 struct list_head list;
226 u64 parent_ino;
227 u64 ino;
228 u64 gen;
229 struct list_head update_refs;
232 struct waiting_dir_move {
233 struct rb_node node;
234 u64 ino;
236 * There might be some directory that could not be removed because it
237 * was waiting for this directory inode to be moved first. Therefore
238 * after this directory is moved, we can try to rmdir the ino rmdir_ino.
240 u64 rmdir_ino;
241 bool orphanized;
244 struct orphan_dir_info {
245 struct rb_node node;
246 u64 ino;
247 u64 gen;
248 u64 last_dir_index_offset;
251 struct name_cache_entry {
252 struct list_head list;
254 * radix_tree has only 32bit entries but we need to handle 64bit inums.
255 * We use the lower 32bit of the 64bit inum to store it in the tree. If
256 * more then one inum would fall into the same entry, we use radix_list
257 * to store the additional entries. radix_list is also used to store
258 * entries where two entries have the same inum but different
259 * generations.
261 struct list_head radix_list;
262 u64 ino;
263 u64 gen;
264 u64 parent_ino;
265 u64 parent_gen;
266 int ret;
267 int need_later_update;
268 int name_len;
269 char name[];
272 __cold
273 static void inconsistent_snapshot_error(struct send_ctx *sctx,
274 enum btrfs_compare_tree_result result,
275 const char *what)
277 const char *result_string;
279 switch (result) {
280 case BTRFS_COMPARE_TREE_NEW:
281 result_string = "new";
282 break;
283 case BTRFS_COMPARE_TREE_DELETED:
284 result_string = "deleted";
285 break;
286 case BTRFS_COMPARE_TREE_CHANGED:
287 result_string = "updated";
288 break;
289 case BTRFS_COMPARE_TREE_SAME:
290 ASSERT(0);
291 result_string = "unchanged";
292 break;
293 default:
294 ASSERT(0);
295 result_string = "unexpected";
298 btrfs_err(sctx->send_root->fs_info,
299 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
300 result_string, what, sctx->cmp_key->objectid,
301 sctx->send_root->root_key.objectid,
302 (sctx->parent_root ?
303 sctx->parent_root->root_key.objectid : 0));
306 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
308 static struct waiting_dir_move *
309 get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
311 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
313 static int need_send_hole(struct send_ctx *sctx)
315 return (sctx->parent_root && !sctx->cur_inode_new &&
316 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
317 S_ISREG(sctx->cur_inode_mode));
320 static void fs_path_reset(struct fs_path *p)
322 if (p->reversed) {
323 p->start = p->buf + p->buf_len - 1;
324 p->end = p->start;
325 *p->start = 0;
326 } else {
327 p->start = p->buf;
328 p->end = p->start;
329 *p->start = 0;
333 static struct fs_path *fs_path_alloc(void)
335 struct fs_path *p;
337 p = kmalloc(sizeof(*p), GFP_KERNEL);
338 if (!p)
339 return NULL;
340 p->reversed = 0;
341 p->buf = p->inline_buf;
342 p->buf_len = FS_PATH_INLINE_SIZE;
343 fs_path_reset(p);
344 return p;
347 static struct fs_path *fs_path_alloc_reversed(void)
349 struct fs_path *p;
351 p = fs_path_alloc();
352 if (!p)
353 return NULL;
354 p->reversed = 1;
355 fs_path_reset(p);
356 return p;
359 static void fs_path_free(struct fs_path *p)
361 if (!p)
362 return;
363 if (p->buf != p->inline_buf)
364 kfree(p->buf);
365 kfree(p);
368 static int fs_path_len(struct fs_path *p)
370 return p->end - p->start;
373 static int fs_path_ensure_buf(struct fs_path *p, int len)
375 char *tmp_buf;
376 int path_len;
377 int old_buf_len;
379 len++;
381 if (p->buf_len >= len)
382 return 0;
384 if (len > PATH_MAX) {
385 WARN_ON(1);
386 return -ENOMEM;
389 path_len = p->end - p->start;
390 old_buf_len = p->buf_len;
393 * First time the inline_buf does not suffice
395 if (p->buf == p->inline_buf) {
396 tmp_buf = kmalloc(len, GFP_KERNEL);
397 if (tmp_buf)
398 memcpy(tmp_buf, p->buf, old_buf_len);
399 } else {
400 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
402 if (!tmp_buf)
403 return -ENOMEM;
404 p->buf = tmp_buf;
406 * The real size of the buffer is bigger, this will let the fast path
407 * happen most of the time
409 p->buf_len = ksize(p->buf);
411 if (p->reversed) {
412 tmp_buf = p->buf + old_buf_len - path_len - 1;
413 p->end = p->buf + p->buf_len - 1;
414 p->start = p->end - path_len;
415 memmove(p->start, tmp_buf, path_len + 1);
416 } else {
417 p->start = p->buf;
418 p->end = p->start + path_len;
420 return 0;
423 static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
424 char **prepared)
426 int ret;
427 int new_len;
429 new_len = p->end - p->start + name_len;
430 if (p->start != p->end)
431 new_len++;
432 ret = fs_path_ensure_buf(p, new_len);
433 if (ret < 0)
434 goto out;
436 if (p->reversed) {
437 if (p->start != p->end)
438 *--p->start = '/';
439 p->start -= name_len;
440 *prepared = p->start;
441 } else {
442 if (p->start != p->end)
443 *p->end++ = '/';
444 *prepared = p->end;
445 p->end += name_len;
446 *p->end = 0;
449 out:
450 return ret;
453 static int fs_path_add(struct fs_path *p, const char *name, int name_len)
455 int ret;
456 char *prepared;
458 ret = fs_path_prepare_for_add(p, name_len, &prepared);
459 if (ret < 0)
460 goto out;
461 memcpy(prepared, name, name_len);
463 out:
464 return ret;
467 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
469 int ret;
470 char *prepared;
472 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
473 if (ret < 0)
474 goto out;
475 memcpy(prepared, p2->start, p2->end - p2->start);
477 out:
478 return ret;
481 static int fs_path_add_from_extent_buffer(struct fs_path *p,
482 struct extent_buffer *eb,
483 unsigned long off, int len)
485 int ret;
486 char *prepared;
488 ret = fs_path_prepare_for_add(p, len, &prepared);
489 if (ret < 0)
490 goto out;
492 read_extent_buffer(eb, prepared, off, len);
494 out:
495 return ret;
498 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
500 int ret;
502 p->reversed = from->reversed;
503 fs_path_reset(p);
505 ret = fs_path_add_path(p, from);
507 return ret;
511 static void fs_path_unreverse(struct fs_path *p)
513 char *tmp;
514 int len;
516 if (!p->reversed)
517 return;
519 tmp = p->start;
520 len = p->end - p->start;
521 p->start = p->buf;
522 p->end = p->start + len;
523 memmove(p->start, tmp, len + 1);
524 p->reversed = 0;
527 static struct btrfs_path *alloc_path_for_send(void)
529 struct btrfs_path *path;
531 path = btrfs_alloc_path();
532 if (!path)
533 return NULL;
534 path->search_commit_root = 1;
535 path->skip_locking = 1;
536 path->need_commit_sem = 1;
537 return path;
540 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
542 int ret;
543 u32 pos = 0;
545 while (pos < len) {
546 ret = kernel_write(filp, buf + pos, len - pos, off);
547 /* TODO handle that correctly */
548 /*if (ret == -ERESTARTSYS) {
549 continue;
551 if (ret < 0)
552 return ret;
553 if (ret == 0) {
554 return -EIO;
556 pos += ret;
559 return 0;
562 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
564 struct btrfs_tlv_header *hdr;
565 int total_len = sizeof(*hdr) + len;
566 int left = sctx->send_max_size - sctx->send_size;
568 if (unlikely(left < total_len))
569 return -EOVERFLOW;
571 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
572 hdr->tlv_type = cpu_to_le16(attr);
573 hdr->tlv_len = cpu_to_le16(len);
574 memcpy(hdr + 1, data, len);
575 sctx->send_size += total_len;
577 return 0;
580 #define TLV_PUT_DEFINE_INT(bits) \
581 static int tlv_put_u##bits(struct send_ctx *sctx, \
582 u##bits attr, u##bits value) \
584 __le##bits __tmp = cpu_to_le##bits(value); \
585 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
588 TLV_PUT_DEFINE_INT(64)
590 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
591 const char *str, int len)
593 if (len == -1)
594 len = strlen(str);
595 return tlv_put(sctx, attr, str, len);
598 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
599 const u8 *uuid)
601 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
604 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
605 struct extent_buffer *eb,
606 struct btrfs_timespec *ts)
608 struct btrfs_timespec bts;
609 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
610 return tlv_put(sctx, attr, &bts, sizeof(bts));
614 #define TLV_PUT(sctx, attrtype, data, attrlen) \
615 do { \
616 ret = tlv_put(sctx, attrtype, data, attrlen); \
617 if (ret < 0) \
618 goto tlv_put_failure; \
619 } while (0)
621 #define TLV_PUT_INT(sctx, attrtype, bits, value) \
622 do { \
623 ret = tlv_put_u##bits(sctx, attrtype, value); \
624 if (ret < 0) \
625 goto tlv_put_failure; \
626 } while (0)
628 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
629 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
630 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
631 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
632 #define TLV_PUT_STRING(sctx, attrtype, str, len) \
633 do { \
634 ret = tlv_put_string(sctx, attrtype, str, len); \
635 if (ret < 0) \
636 goto tlv_put_failure; \
637 } while (0)
638 #define TLV_PUT_PATH(sctx, attrtype, p) \
639 do { \
640 ret = tlv_put_string(sctx, attrtype, p->start, \
641 p->end - p->start); \
642 if (ret < 0) \
643 goto tlv_put_failure; \
644 } while(0)
645 #define TLV_PUT_UUID(sctx, attrtype, uuid) \
646 do { \
647 ret = tlv_put_uuid(sctx, attrtype, uuid); \
648 if (ret < 0) \
649 goto tlv_put_failure; \
650 } while (0)
651 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
652 do { \
653 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
654 if (ret < 0) \
655 goto tlv_put_failure; \
656 } while (0)
658 static int send_header(struct send_ctx *sctx)
660 struct btrfs_stream_header hdr;
662 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
663 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
665 return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
666 &sctx->send_off);
670 * For each command/item we want to send to userspace, we call this function.
672 static int begin_cmd(struct send_ctx *sctx, int cmd)
674 struct btrfs_cmd_header *hdr;
676 if (WARN_ON(!sctx->send_buf))
677 return -EINVAL;
679 BUG_ON(sctx->send_size);
681 sctx->send_size += sizeof(*hdr);
682 hdr = (struct btrfs_cmd_header *)sctx->send_buf;
683 hdr->cmd = cpu_to_le16(cmd);
685 return 0;
688 static int send_cmd(struct send_ctx *sctx)
690 int ret;
691 struct btrfs_cmd_header *hdr;
692 u32 crc;
694 hdr = (struct btrfs_cmd_header *)sctx->send_buf;
695 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
696 hdr->crc = 0;
698 crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
699 hdr->crc = cpu_to_le32(crc);
701 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
702 &sctx->send_off);
704 sctx->total_send_size += sctx->send_size;
705 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
706 sctx->send_size = 0;
708 return ret;
712 * Sends a move instruction to user space
714 static int send_rename(struct send_ctx *sctx,
715 struct fs_path *from, struct fs_path *to)
717 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
718 int ret;
720 btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
722 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
723 if (ret < 0)
724 goto out;
726 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
727 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
729 ret = send_cmd(sctx);
731 tlv_put_failure:
732 out:
733 return ret;
737 * Sends a link instruction to user space
739 static int send_link(struct send_ctx *sctx,
740 struct fs_path *path, struct fs_path *lnk)
742 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
743 int ret;
745 btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
747 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
748 if (ret < 0)
749 goto out;
751 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
752 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
754 ret = send_cmd(sctx);
756 tlv_put_failure:
757 out:
758 return ret;
762 * Sends an unlink instruction to user space
764 static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
766 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
767 int ret;
769 btrfs_debug(fs_info, "send_unlink %s", path->start);
771 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
772 if (ret < 0)
773 goto out;
775 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
777 ret = send_cmd(sctx);
779 tlv_put_failure:
780 out:
781 return ret;
785 * Sends a rmdir instruction to user space
787 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
789 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
790 int ret;
792 btrfs_debug(fs_info, "send_rmdir %s", path->start);
794 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
795 if (ret < 0)
796 goto out;
798 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
800 ret = send_cmd(sctx);
802 tlv_put_failure:
803 out:
804 return ret;
808 * Helper function to retrieve some fields from an inode item.
810 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
811 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
812 u64 *gid, u64 *rdev)
814 int ret;
815 struct btrfs_inode_item *ii;
816 struct btrfs_key key;
818 key.objectid = ino;
819 key.type = BTRFS_INODE_ITEM_KEY;
820 key.offset = 0;
821 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
822 if (ret) {
823 if (ret > 0)
824 ret = -ENOENT;
825 return ret;
828 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
829 struct btrfs_inode_item);
830 if (size)
831 *size = btrfs_inode_size(path->nodes[0], ii);
832 if (gen)
833 *gen = btrfs_inode_generation(path->nodes[0], ii);
834 if (mode)
835 *mode = btrfs_inode_mode(path->nodes[0], ii);
836 if (uid)
837 *uid = btrfs_inode_uid(path->nodes[0], ii);
838 if (gid)
839 *gid = btrfs_inode_gid(path->nodes[0], ii);
840 if (rdev)
841 *rdev = btrfs_inode_rdev(path->nodes[0], ii);
843 return ret;
846 static int get_inode_info(struct btrfs_root *root,
847 u64 ino, u64 *size, u64 *gen,
848 u64 *mode, u64 *uid, u64 *gid,
849 u64 *rdev)
851 struct btrfs_path *path;
852 int ret;
854 path = alloc_path_for_send();
855 if (!path)
856 return -ENOMEM;
857 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
858 rdev);
859 btrfs_free_path(path);
860 return ret;
863 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
864 struct fs_path *p,
865 void *ctx);
868 * Helper function to iterate the entries in ONE btrfs_inode_ref or
869 * btrfs_inode_extref.
870 * The iterate callback may return a non zero value to stop iteration. This can
871 * be a negative value for error codes or 1 to simply stop it.
873 * path must point to the INODE_REF or INODE_EXTREF when called.
875 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
876 struct btrfs_key *found_key, int resolve,
877 iterate_inode_ref_t iterate, void *ctx)
879 struct extent_buffer *eb = path->nodes[0];
880 struct btrfs_item *item;
881 struct btrfs_inode_ref *iref;
882 struct btrfs_inode_extref *extref;
883 struct btrfs_path *tmp_path;
884 struct fs_path *p;
885 u32 cur = 0;
886 u32 total;
887 int slot = path->slots[0];
888 u32 name_len;
889 char *start;
890 int ret = 0;
891 int num = 0;
892 int index;
893 u64 dir;
894 unsigned long name_off;
895 unsigned long elem_size;
896 unsigned long ptr;
898 p = fs_path_alloc_reversed();
899 if (!p)
900 return -ENOMEM;
902 tmp_path = alloc_path_for_send();
903 if (!tmp_path) {
904 fs_path_free(p);
905 return -ENOMEM;
909 if (found_key->type == BTRFS_INODE_REF_KEY) {
910 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
911 struct btrfs_inode_ref);
912 item = btrfs_item_nr(slot);
913 total = btrfs_item_size(eb, item);
914 elem_size = sizeof(*iref);
915 } else {
916 ptr = btrfs_item_ptr_offset(eb, slot);
917 total = btrfs_item_size_nr(eb, slot);
918 elem_size = sizeof(*extref);
921 while (cur < total) {
922 fs_path_reset(p);
924 if (found_key->type == BTRFS_INODE_REF_KEY) {
925 iref = (struct btrfs_inode_ref *)(ptr + cur);
926 name_len = btrfs_inode_ref_name_len(eb, iref);
927 name_off = (unsigned long)(iref + 1);
928 index = btrfs_inode_ref_index(eb, iref);
929 dir = found_key->offset;
930 } else {
931 extref = (struct btrfs_inode_extref *)(ptr + cur);
932 name_len = btrfs_inode_extref_name_len(eb, extref);
933 name_off = (unsigned long)&extref->name;
934 index = btrfs_inode_extref_index(eb, extref);
935 dir = btrfs_inode_extref_parent(eb, extref);
938 if (resolve) {
939 start = btrfs_ref_to_path(root, tmp_path, name_len,
940 name_off, eb, dir,
941 p->buf, p->buf_len);
942 if (IS_ERR(start)) {
943 ret = PTR_ERR(start);
944 goto out;
946 if (start < p->buf) {
947 /* overflow , try again with larger buffer */
948 ret = fs_path_ensure_buf(p,
949 p->buf_len + p->buf - start);
950 if (ret < 0)
951 goto out;
952 start = btrfs_ref_to_path(root, tmp_path,
953 name_len, name_off,
954 eb, dir,
955 p->buf, p->buf_len);
956 if (IS_ERR(start)) {
957 ret = PTR_ERR(start);
958 goto out;
960 BUG_ON(start < p->buf);
962 p->start = start;
963 } else {
964 ret = fs_path_add_from_extent_buffer(p, eb, name_off,
965 name_len);
966 if (ret < 0)
967 goto out;
970 cur += elem_size + name_len;
971 ret = iterate(num, dir, index, p, ctx);
972 if (ret)
973 goto out;
974 num++;
977 out:
978 btrfs_free_path(tmp_path);
979 fs_path_free(p);
980 return ret;
983 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
984 const char *name, int name_len,
985 const char *data, int data_len,
986 u8 type, void *ctx);
989 * Helper function to iterate the entries in ONE btrfs_dir_item.
990 * The iterate callback may return a non zero value to stop iteration. This can
991 * be a negative value for error codes or 1 to simply stop it.
993 * path must point to the dir item when called.
995 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
996 iterate_dir_item_t iterate, void *ctx)
998 int ret = 0;
999 struct extent_buffer *eb;
1000 struct btrfs_item *item;
1001 struct btrfs_dir_item *di;
1002 struct btrfs_key di_key;
1003 char *buf = NULL;
1004 int buf_len;
1005 u32 name_len;
1006 u32 data_len;
1007 u32 cur;
1008 u32 len;
1009 u32 total;
1010 int slot;
1011 int num;
1012 u8 type;
1015 * Start with a small buffer (1 page). If later we end up needing more
1016 * space, which can happen for xattrs on a fs with a leaf size greater
1017 * then the page size, attempt to increase the buffer. Typically xattr
1018 * values are small.
1020 buf_len = PATH_MAX;
1021 buf = kmalloc(buf_len, GFP_KERNEL);
1022 if (!buf) {
1023 ret = -ENOMEM;
1024 goto out;
1027 eb = path->nodes[0];
1028 slot = path->slots[0];
1029 item = btrfs_item_nr(slot);
1030 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1031 cur = 0;
1032 len = 0;
1033 total = btrfs_item_size(eb, item);
1035 num = 0;
1036 while (cur < total) {
1037 name_len = btrfs_dir_name_len(eb, di);
1038 data_len = btrfs_dir_data_len(eb, di);
1039 type = btrfs_dir_type(eb, di);
1040 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1042 if (type == BTRFS_FT_XATTR) {
1043 if (name_len > XATTR_NAME_MAX) {
1044 ret = -ENAMETOOLONG;
1045 goto out;
1047 if (name_len + data_len >
1048 BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
1049 ret = -E2BIG;
1050 goto out;
1052 } else {
1054 * Path too long
1056 if (name_len + data_len > PATH_MAX) {
1057 ret = -ENAMETOOLONG;
1058 goto out;
1062 if (name_len + data_len > buf_len) {
1063 buf_len = name_len + data_len;
1064 if (is_vmalloc_addr(buf)) {
1065 vfree(buf);
1066 buf = NULL;
1067 } else {
1068 char *tmp = krealloc(buf, buf_len,
1069 GFP_KERNEL | __GFP_NOWARN);
1071 if (!tmp)
1072 kfree(buf);
1073 buf = tmp;
1075 if (!buf) {
1076 buf = kvmalloc(buf_len, GFP_KERNEL);
1077 if (!buf) {
1078 ret = -ENOMEM;
1079 goto out;
1084 read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1085 name_len + data_len);
1087 len = sizeof(*di) + name_len + data_len;
1088 di = (struct btrfs_dir_item *)((char *)di + len);
1089 cur += len;
1091 ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1092 data_len, type, ctx);
1093 if (ret < 0)
1094 goto out;
1095 if (ret) {
1096 ret = 0;
1097 goto out;
1100 num++;
1103 out:
1104 kvfree(buf);
1105 return ret;
1108 static int __copy_first_ref(int num, u64 dir, int index,
1109 struct fs_path *p, void *ctx)
1111 int ret;
1112 struct fs_path *pt = ctx;
1114 ret = fs_path_copy(pt, p);
1115 if (ret < 0)
1116 return ret;
1118 /* we want the first only */
1119 return 1;
1123 * Retrieve the first path of an inode. If an inode has more then one
1124 * ref/hardlink, this is ignored.
1126 static int get_inode_path(struct btrfs_root *root,
1127 u64 ino, struct fs_path *path)
1129 int ret;
1130 struct btrfs_key key, found_key;
1131 struct btrfs_path *p;
1133 p = alloc_path_for_send();
1134 if (!p)
1135 return -ENOMEM;
1137 fs_path_reset(path);
1139 key.objectid = ino;
1140 key.type = BTRFS_INODE_REF_KEY;
1141 key.offset = 0;
1143 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1144 if (ret < 0)
1145 goto out;
1146 if (ret) {
1147 ret = 1;
1148 goto out;
1150 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1151 if (found_key.objectid != ino ||
1152 (found_key.type != BTRFS_INODE_REF_KEY &&
1153 found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1154 ret = -ENOENT;
1155 goto out;
1158 ret = iterate_inode_ref(root, p, &found_key, 1,
1159 __copy_first_ref, path);
1160 if (ret < 0)
1161 goto out;
1162 ret = 0;
1164 out:
1165 btrfs_free_path(p);
1166 return ret;
1169 struct backref_ctx {
1170 struct send_ctx *sctx;
1172 struct btrfs_path *path;
1173 /* number of total found references */
1174 u64 found;
1177 * used for clones found in send_root. clones found behind cur_objectid
1178 * and cur_offset are not considered as allowed clones.
1180 u64 cur_objectid;
1181 u64 cur_offset;
1183 /* may be truncated in case it's the last extent in a file */
1184 u64 extent_len;
1186 /* data offset in the file extent item */
1187 u64 data_offset;
1189 /* Just to check for bugs in backref resolving */
1190 int found_itself;
1193 static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1195 u64 root = (u64)(uintptr_t)key;
1196 struct clone_root *cr = (struct clone_root *)elt;
1198 if (root < cr->root->objectid)
1199 return -1;
1200 if (root > cr->root->objectid)
1201 return 1;
1202 return 0;
1205 static int __clone_root_cmp_sort(const void *e1, const void *e2)
1207 struct clone_root *cr1 = (struct clone_root *)e1;
1208 struct clone_root *cr2 = (struct clone_root *)e2;
1210 if (cr1->root->objectid < cr2->root->objectid)
1211 return -1;
1212 if (cr1->root->objectid > cr2->root->objectid)
1213 return 1;
1214 return 0;
1218 * Called for every backref that is found for the current extent.
1219 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1221 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1223 struct backref_ctx *bctx = ctx_;
1224 struct clone_root *found;
1225 int ret;
1226 u64 i_size;
1228 /* First check if the root is in the list of accepted clone sources */
1229 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1230 bctx->sctx->clone_roots_cnt,
1231 sizeof(struct clone_root),
1232 __clone_root_cmp_bsearch);
1233 if (!found)
1234 return 0;
1236 if (found->root == bctx->sctx->send_root &&
1237 ino == bctx->cur_objectid &&
1238 offset == bctx->cur_offset) {
1239 bctx->found_itself = 1;
1243 * There are inodes that have extents that lie behind its i_size. Don't
1244 * accept clones from these extents.
1246 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
1247 NULL, NULL, NULL);
1248 btrfs_release_path(bctx->path);
1249 if (ret < 0)
1250 return ret;
1252 if (offset + bctx->data_offset + bctx->extent_len > i_size)
1253 return 0;
1256 * Make sure we don't consider clones from send_root that are
1257 * behind the current inode/offset.
1259 if (found->root == bctx->sctx->send_root) {
1261 * TODO for the moment we don't accept clones from the inode
1262 * that is currently send. We may change this when
1263 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1264 * file.
1266 if (ino >= bctx->cur_objectid)
1267 return 0;
1270 bctx->found++;
1271 found->found_refs++;
1272 if (ino < found->ino) {
1273 found->ino = ino;
1274 found->offset = offset;
1275 } else if (found->ino == ino) {
1277 * same extent found more then once in the same file.
1279 if (found->offset > offset + bctx->extent_len)
1280 found->offset = offset;
1283 return 0;
1287 * Given an inode, offset and extent item, it finds a good clone for a clone
1288 * instruction. Returns -ENOENT when none could be found. The function makes
1289 * sure that the returned clone is usable at the point where sending is at the
1290 * moment. This means, that no clones are accepted which lie behind the current
1291 * inode+offset.
1293 * path must point to the extent item when called.
1295 static int find_extent_clone(struct send_ctx *sctx,
1296 struct btrfs_path *path,
1297 u64 ino, u64 data_offset,
1298 u64 ino_size,
1299 struct clone_root **found)
1301 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
1302 int ret;
1303 int extent_type;
1304 u64 logical;
1305 u64 disk_byte;
1306 u64 num_bytes;
1307 u64 extent_item_pos;
1308 u64 flags = 0;
1309 struct btrfs_file_extent_item *fi;
1310 struct extent_buffer *eb = path->nodes[0];
1311 struct backref_ctx *backref_ctx = NULL;
1312 struct clone_root *cur_clone_root;
1313 struct btrfs_key found_key;
1314 struct btrfs_path *tmp_path;
1315 struct btrfs_extent_item *ei;
1316 int compressed;
1317 u32 i;
1319 tmp_path = alloc_path_for_send();
1320 if (!tmp_path)
1321 return -ENOMEM;
1323 /* We only use this path under the commit sem */
1324 tmp_path->need_commit_sem = 0;
1326 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
1327 if (!backref_ctx) {
1328 ret = -ENOMEM;
1329 goto out;
1332 backref_ctx->path = tmp_path;
1334 if (data_offset >= ino_size) {
1336 * There may be extents that lie behind the file's size.
1337 * I at least had this in combination with snapshotting while
1338 * writing large files.
1340 ret = 0;
1341 goto out;
1344 fi = btrfs_item_ptr(eb, path->slots[0],
1345 struct btrfs_file_extent_item);
1346 extent_type = btrfs_file_extent_type(eb, fi);
1347 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1348 ret = -ENOENT;
1349 goto out;
1351 compressed = btrfs_file_extent_compression(eb, fi);
1353 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1354 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1355 if (disk_byte == 0) {
1356 ret = -ENOENT;
1357 goto out;
1359 logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1361 down_read(&fs_info->commit_root_sem);
1362 ret = extent_from_logical(fs_info, disk_byte, tmp_path,
1363 &found_key, &flags);
1364 up_read(&fs_info->commit_root_sem);
1366 if (ret < 0)
1367 goto out;
1368 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1369 ret = -EIO;
1370 goto out;
1373 ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
1374 struct btrfs_extent_item);
1376 * Backreference walking (iterate_extent_inodes() below) is currently
1377 * too expensive when an extent has a large number of references, both
1378 * in time spent and used memory. So for now just fallback to write
1379 * operations instead of clone operations when an extent has more than
1380 * a certain amount of references.
1382 if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
1383 ret = -ENOENT;
1384 goto out;
1386 btrfs_release_path(tmp_path);
1389 * Setup the clone roots.
1391 for (i = 0; i < sctx->clone_roots_cnt; i++) {
1392 cur_clone_root = sctx->clone_roots + i;
1393 cur_clone_root->ino = (u64)-1;
1394 cur_clone_root->offset = 0;
1395 cur_clone_root->found_refs = 0;
1398 backref_ctx->sctx = sctx;
1399 backref_ctx->found = 0;
1400 backref_ctx->cur_objectid = ino;
1401 backref_ctx->cur_offset = data_offset;
1402 backref_ctx->found_itself = 0;
1403 backref_ctx->extent_len = num_bytes;
1405 * For non-compressed extents iterate_extent_inodes() gives us extent
1406 * offsets that already take into account the data offset, but not for
1407 * compressed extents, since the offset is logical and not relative to
1408 * the physical extent locations. We must take this into account to
1409 * avoid sending clone offsets that go beyond the source file's size,
1410 * which would result in the clone ioctl failing with -EINVAL on the
1411 * receiving end.
1413 if (compressed == BTRFS_COMPRESS_NONE)
1414 backref_ctx->data_offset = 0;
1415 else
1416 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
1419 * The last extent of a file may be too large due to page alignment.
1420 * We need to adjust extent_len in this case so that the checks in
1421 * __iterate_backrefs work.
1423 if (data_offset + num_bytes >= ino_size)
1424 backref_ctx->extent_len = ino_size - data_offset;
1427 * Now collect all backrefs.
1429 if (compressed == BTRFS_COMPRESS_NONE)
1430 extent_item_pos = logical - found_key.objectid;
1431 else
1432 extent_item_pos = 0;
1433 ret = iterate_extent_inodes(fs_info, found_key.objectid,
1434 extent_item_pos, 1, __iterate_backrefs,
1435 backref_ctx, false);
1437 if (ret < 0)
1438 goto out;
1440 if (!backref_ctx->found_itself) {
1441 /* found a bug in backref code? */
1442 ret = -EIO;
1443 btrfs_err(fs_info,
1444 "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu",
1445 ino, data_offset, disk_byte, found_key.objectid);
1446 goto out;
1449 btrfs_debug(fs_info,
1450 "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
1451 data_offset, ino, num_bytes, logical);
1453 if (!backref_ctx->found)
1454 btrfs_debug(fs_info, "no clones found");
1456 cur_clone_root = NULL;
1457 for (i = 0; i < sctx->clone_roots_cnt; i++) {
1458 if (sctx->clone_roots[i].found_refs) {
1459 if (!cur_clone_root)
1460 cur_clone_root = sctx->clone_roots + i;
1461 else if (sctx->clone_roots[i].root == sctx->send_root)
1462 /* prefer clones from send_root over others */
1463 cur_clone_root = sctx->clone_roots + i;
1468 if (cur_clone_root) {
1469 *found = cur_clone_root;
1470 ret = 0;
1471 } else {
1472 ret = -ENOENT;
1475 out:
1476 btrfs_free_path(tmp_path);
1477 kfree(backref_ctx);
1478 return ret;
1481 static int read_symlink(struct btrfs_root *root,
1482 u64 ino,
1483 struct fs_path *dest)
1485 int ret;
1486 struct btrfs_path *path;
1487 struct btrfs_key key;
1488 struct btrfs_file_extent_item *ei;
1489 u8 type;
1490 u8 compression;
1491 unsigned long off;
1492 int len;
1494 path = alloc_path_for_send();
1495 if (!path)
1496 return -ENOMEM;
1498 key.objectid = ino;
1499 key.type = BTRFS_EXTENT_DATA_KEY;
1500 key.offset = 0;
1501 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1502 if (ret < 0)
1503 goto out;
1504 if (ret) {
1506 * An empty symlink inode. Can happen in rare error paths when
1507 * creating a symlink (transaction committed before the inode
1508 * eviction handler removed the symlink inode items and a crash
1509 * happened in between or the subvol was snapshoted in between).
1510 * Print an informative message to dmesg/syslog so that the user
1511 * can delete the symlink.
1513 btrfs_err(root->fs_info,
1514 "Found empty symlink inode %llu at root %llu",
1515 ino, root->root_key.objectid);
1516 ret = -EIO;
1517 goto out;
1520 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1521 struct btrfs_file_extent_item);
1522 type = btrfs_file_extent_type(path->nodes[0], ei);
1523 compression = btrfs_file_extent_compression(path->nodes[0], ei);
1524 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1525 BUG_ON(compression);
1527 off = btrfs_file_extent_inline_start(ei);
1528 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
1530 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1532 out:
1533 btrfs_free_path(path);
1534 return ret;
1538 * Helper function to generate a file name that is unique in the root of
1539 * send_root and parent_root. This is used to generate names for orphan inodes.
1541 static int gen_unique_name(struct send_ctx *sctx,
1542 u64 ino, u64 gen,
1543 struct fs_path *dest)
1545 int ret = 0;
1546 struct btrfs_path *path;
1547 struct btrfs_dir_item *di;
1548 char tmp[64];
1549 int len;
1550 u64 idx = 0;
1552 path = alloc_path_for_send();
1553 if (!path)
1554 return -ENOMEM;
1556 while (1) {
1557 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1558 ino, gen, idx);
1559 ASSERT(len < sizeof(tmp));
1561 di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1562 path, BTRFS_FIRST_FREE_OBJECTID,
1563 tmp, strlen(tmp), 0);
1564 btrfs_release_path(path);
1565 if (IS_ERR(di)) {
1566 ret = PTR_ERR(di);
1567 goto out;
1569 if (di) {
1570 /* not unique, try again */
1571 idx++;
1572 continue;
1575 if (!sctx->parent_root) {
1576 /* unique */
1577 ret = 0;
1578 break;
1581 di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1582 path, BTRFS_FIRST_FREE_OBJECTID,
1583 tmp, strlen(tmp), 0);
1584 btrfs_release_path(path);
1585 if (IS_ERR(di)) {
1586 ret = PTR_ERR(di);
1587 goto out;
1589 if (di) {
1590 /* not unique, try again */
1591 idx++;
1592 continue;
1594 /* unique */
1595 break;
1598 ret = fs_path_add(dest, tmp, strlen(tmp));
1600 out:
1601 btrfs_free_path(path);
1602 return ret;
1605 enum inode_state {
1606 inode_state_no_change,
1607 inode_state_will_create,
1608 inode_state_did_create,
1609 inode_state_will_delete,
1610 inode_state_did_delete,
1613 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1615 int ret;
1616 int left_ret;
1617 int right_ret;
1618 u64 left_gen;
1619 u64 right_gen;
1621 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1622 NULL, NULL);
1623 if (ret < 0 && ret != -ENOENT)
1624 goto out;
1625 left_ret = ret;
1627 if (!sctx->parent_root) {
1628 right_ret = -ENOENT;
1629 } else {
1630 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1631 NULL, NULL, NULL, NULL);
1632 if (ret < 0 && ret != -ENOENT)
1633 goto out;
1634 right_ret = ret;
1637 if (!left_ret && !right_ret) {
1638 if (left_gen == gen && right_gen == gen) {
1639 ret = inode_state_no_change;
1640 } else if (left_gen == gen) {
1641 if (ino < sctx->send_progress)
1642 ret = inode_state_did_create;
1643 else
1644 ret = inode_state_will_create;
1645 } else if (right_gen == gen) {
1646 if (ino < sctx->send_progress)
1647 ret = inode_state_did_delete;
1648 else
1649 ret = inode_state_will_delete;
1650 } else {
1651 ret = -ENOENT;
1653 } else if (!left_ret) {
1654 if (left_gen == gen) {
1655 if (ino < sctx->send_progress)
1656 ret = inode_state_did_create;
1657 else
1658 ret = inode_state_will_create;
1659 } else {
1660 ret = -ENOENT;
1662 } else if (!right_ret) {
1663 if (right_gen == gen) {
1664 if (ino < sctx->send_progress)
1665 ret = inode_state_did_delete;
1666 else
1667 ret = inode_state_will_delete;
1668 } else {
1669 ret = -ENOENT;
1671 } else {
1672 ret = -ENOENT;
1675 out:
1676 return ret;
1679 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1681 int ret;
1683 if (ino == BTRFS_FIRST_FREE_OBJECTID)
1684 return 1;
1686 ret = get_cur_inode_state(sctx, ino, gen);
1687 if (ret < 0)
1688 goto out;
1690 if (ret == inode_state_no_change ||
1691 ret == inode_state_did_create ||
1692 ret == inode_state_will_delete)
1693 ret = 1;
1694 else
1695 ret = 0;
1697 out:
1698 return ret;
1702 * Helper function to lookup a dir item in a dir.
1704 static int lookup_dir_item_inode(struct btrfs_root *root,
1705 u64 dir, const char *name, int name_len,
1706 u64 *found_inode,
1707 u8 *found_type)
1709 int ret = 0;
1710 struct btrfs_dir_item *di;
1711 struct btrfs_key key;
1712 struct btrfs_path *path;
1714 path = alloc_path_for_send();
1715 if (!path)
1716 return -ENOMEM;
1718 di = btrfs_lookup_dir_item(NULL, root, path,
1719 dir, name, name_len, 0);
1720 if (!di) {
1721 ret = -ENOENT;
1722 goto out;
1724 if (IS_ERR(di)) {
1725 ret = PTR_ERR(di);
1726 goto out;
1728 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1729 if (key.type == BTRFS_ROOT_ITEM_KEY) {
1730 ret = -ENOENT;
1731 goto out;
1733 *found_inode = key.objectid;
1734 *found_type = btrfs_dir_type(path->nodes[0], di);
1736 out:
1737 btrfs_free_path(path);
1738 return ret;
1742 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1743 * generation of the parent dir and the name of the dir entry.
1745 static int get_first_ref(struct btrfs_root *root, u64 ino,
1746 u64 *dir, u64 *dir_gen, struct fs_path *name)
1748 int ret;
1749 struct btrfs_key key;
1750 struct btrfs_key found_key;
1751 struct btrfs_path *path;
1752 int len;
1753 u64 parent_dir;
1755 path = alloc_path_for_send();
1756 if (!path)
1757 return -ENOMEM;
1759 key.objectid = ino;
1760 key.type = BTRFS_INODE_REF_KEY;
1761 key.offset = 0;
1763 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1764 if (ret < 0)
1765 goto out;
1766 if (!ret)
1767 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1768 path->slots[0]);
1769 if (ret || found_key.objectid != ino ||
1770 (found_key.type != BTRFS_INODE_REF_KEY &&
1771 found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1772 ret = -ENOENT;
1773 goto out;
1776 if (found_key.type == BTRFS_INODE_REF_KEY) {
1777 struct btrfs_inode_ref *iref;
1778 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1779 struct btrfs_inode_ref);
1780 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1781 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1782 (unsigned long)(iref + 1),
1783 len);
1784 parent_dir = found_key.offset;
1785 } else {
1786 struct btrfs_inode_extref *extref;
1787 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1788 struct btrfs_inode_extref);
1789 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1790 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1791 (unsigned long)&extref->name, len);
1792 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1794 if (ret < 0)
1795 goto out;
1796 btrfs_release_path(path);
1798 if (dir_gen) {
1799 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
1800 NULL, NULL, NULL);
1801 if (ret < 0)
1802 goto out;
1805 *dir = parent_dir;
1807 out:
1808 btrfs_free_path(path);
1809 return ret;
1812 static int is_first_ref(struct btrfs_root *root,
1813 u64 ino, u64 dir,
1814 const char *name, int name_len)
1816 int ret;
1817 struct fs_path *tmp_name;
1818 u64 tmp_dir;
1820 tmp_name = fs_path_alloc();
1821 if (!tmp_name)
1822 return -ENOMEM;
1824 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
1825 if (ret < 0)
1826 goto out;
1828 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1829 ret = 0;
1830 goto out;
1833 ret = !memcmp(tmp_name->start, name, name_len);
1835 out:
1836 fs_path_free(tmp_name);
1837 return ret;
1841 * Used by process_recorded_refs to determine if a new ref would overwrite an
1842 * already existing ref. In case it detects an overwrite, it returns the
1843 * inode/gen in who_ino/who_gen.
1844 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1845 * to make sure later references to the overwritten inode are possible.
1846 * Orphanizing is however only required for the first ref of an inode.
1847 * process_recorded_refs does an additional is_first_ref check to see if
1848 * orphanizing is really required.
1850 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1851 const char *name, int name_len,
1852 u64 *who_ino, u64 *who_gen, u64 *who_mode)
1854 int ret = 0;
1855 u64 gen;
1856 u64 other_inode = 0;
1857 u8 other_type = 0;
1859 if (!sctx->parent_root)
1860 goto out;
1862 ret = is_inode_existent(sctx, dir, dir_gen);
1863 if (ret <= 0)
1864 goto out;
1867 * If we have a parent root we need to verify that the parent dir was
1868 * not deleted and then re-created, if it was then we have no overwrite
1869 * and we can just unlink this entry.
1871 if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
1872 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1873 NULL, NULL, NULL);
1874 if (ret < 0 && ret != -ENOENT)
1875 goto out;
1876 if (ret) {
1877 ret = 0;
1878 goto out;
1880 if (gen != dir_gen)
1881 goto out;
1884 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1885 &other_inode, &other_type);
1886 if (ret < 0 && ret != -ENOENT)
1887 goto out;
1888 if (ret) {
1889 ret = 0;
1890 goto out;
1894 * Check if the overwritten ref was already processed. If yes, the ref
1895 * was already unlinked/moved, so we can safely assume that we will not
1896 * overwrite anything at this point in time.
1898 if (other_inode > sctx->send_progress ||
1899 is_waiting_for_move(sctx, other_inode)) {
1900 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1901 who_gen, who_mode, NULL, NULL, NULL);
1902 if (ret < 0)
1903 goto out;
1905 ret = 1;
1906 *who_ino = other_inode;
1907 } else {
1908 ret = 0;
1911 out:
1912 return ret;
1916 * Checks if the ref was overwritten by an already processed inode. This is
1917 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1918 * thus the orphan name needs be used.
1919 * process_recorded_refs also uses it to avoid unlinking of refs that were
1920 * overwritten.
1922 static int did_overwrite_ref(struct send_ctx *sctx,
1923 u64 dir, u64 dir_gen,
1924 u64 ino, u64 ino_gen,
1925 const char *name, int name_len)
1927 int ret = 0;
1928 u64 gen;
1929 u64 ow_inode;
1930 u8 other_type;
1932 if (!sctx->parent_root)
1933 goto out;
1935 ret = is_inode_existent(sctx, dir, dir_gen);
1936 if (ret <= 0)
1937 goto out;
1939 if (dir != BTRFS_FIRST_FREE_OBJECTID) {
1940 ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
1941 NULL, NULL, NULL);
1942 if (ret < 0 && ret != -ENOENT)
1943 goto out;
1944 if (ret) {
1945 ret = 0;
1946 goto out;
1948 if (gen != dir_gen)
1949 goto out;
1952 /* check if the ref was overwritten by another ref */
1953 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1954 &ow_inode, &other_type);
1955 if (ret < 0 && ret != -ENOENT)
1956 goto out;
1957 if (ret) {
1958 /* was never and will never be overwritten */
1959 ret = 0;
1960 goto out;
1963 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1964 NULL, NULL);
1965 if (ret < 0)
1966 goto out;
1968 if (ow_inode == ino && gen == ino_gen) {
1969 ret = 0;
1970 goto out;
1974 * We know that it is or will be overwritten. Check this now.
1975 * The current inode being processed might have been the one that caused
1976 * inode 'ino' to be orphanized, therefore check if ow_inode matches
1977 * the current inode being processed.
1979 if ((ow_inode < sctx->send_progress) ||
1980 (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
1981 gen == sctx->cur_inode_gen))
1982 ret = 1;
1983 else
1984 ret = 0;
1986 out:
1987 return ret;
1991 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1992 * that got overwritten. This is used by process_recorded_refs to determine
1993 * if it has to use the path as returned by get_cur_path or the orphan name.
1995 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1997 int ret = 0;
1998 struct fs_path *name = NULL;
1999 u64 dir;
2000 u64 dir_gen;
2002 if (!sctx->parent_root)
2003 goto out;
2005 name = fs_path_alloc();
2006 if (!name)
2007 return -ENOMEM;
2009 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
2010 if (ret < 0)
2011 goto out;
2013 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
2014 name->start, fs_path_len(name));
2016 out:
2017 fs_path_free(name);
2018 return ret;
2022 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
2023 * so we need to do some special handling in case we have clashes. This function
2024 * takes care of this with the help of name_cache_entry::radix_list.
2025 * In case of error, nce is kfreed.
2027 static int name_cache_insert(struct send_ctx *sctx,
2028 struct name_cache_entry *nce)
2030 int ret = 0;
2031 struct list_head *nce_head;
2033 nce_head = radix_tree_lookup(&sctx->name_cache,
2034 (unsigned long)nce->ino);
2035 if (!nce_head) {
2036 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
2037 if (!nce_head) {
2038 kfree(nce);
2039 return -ENOMEM;
2041 INIT_LIST_HEAD(nce_head);
2043 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
2044 if (ret < 0) {
2045 kfree(nce_head);
2046 kfree(nce);
2047 return ret;
2050 list_add_tail(&nce->radix_list, nce_head);
2051 list_add_tail(&nce->list, &sctx->name_cache_list);
2052 sctx->name_cache_size++;
2054 return ret;
2057 static void name_cache_delete(struct send_ctx *sctx,
2058 struct name_cache_entry *nce)
2060 struct list_head *nce_head;
2062 nce_head = radix_tree_lookup(&sctx->name_cache,
2063 (unsigned long)nce->ino);
2064 if (!nce_head) {
2065 btrfs_err(sctx->send_root->fs_info,
2066 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
2067 nce->ino, sctx->name_cache_size);
2070 list_del(&nce->radix_list);
2071 list_del(&nce->list);
2072 sctx->name_cache_size--;
2075 * We may not get to the final release of nce_head if the lookup fails
2077 if (nce_head && list_empty(nce_head)) {
2078 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
2079 kfree(nce_head);
2083 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
2084 u64 ino, u64 gen)
2086 struct list_head *nce_head;
2087 struct name_cache_entry *cur;
2089 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
2090 if (!nce_head)
2091 return NULL;
2093 list_for_each_entry(cur, nce_head, radix_list) {
2094 if (cur->ino == ino && cur->gen == gen)
2095 return cur;
2097 return NULL;
2101 * Removes the entry from the list and adds it back to the end. This marks the
2102 * entry as recently used so that name_cache_clean_unused does not remove it.
2104 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
2106 list_del(&nce->list);
2107 list_add_tail(&nce->list, &sctx->name_cache_list);
2111 * Remove some entries from the beginning of name_cache_list.
2113 static void name_cache_clean_unused(struct send_ctx *sctx)
2115 struct name_cache_entry *nce;
2117 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
2118 return;
2120 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
2121 nce = list_entry(sctx->name_cache_list.next,
2122 struct name_cache_entry, list);
2123 name_cache_delete(sctx, nce);
2124 kfree(nce);
2128 static void name_cache_free(struct send_ctx *sctx)
2130 struct name_cache_entry *nce;
2132 while (!list_empty(&sctx->name_cache_list)) {
2133 nce = list_entry(sctx->name_cache_list.next,
2134 struct name_cache_entry, list);
2135 name_cache_delete(sctx, nce);
2136 kfree(nce);
2141 * Used by get_cur_path for each ref up to the root.
2142 * Returns 0 if it succeeded.
2143 * Returns 1 if the inode is not existent or got overwritten. In that case, the
2144 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2145 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2146 * Returns <0 in case of error.
2148 static int __get_cur_name_and_parent(struct send_ctx *sctx,
2149 u64 ino, u64 gen,
2150 u64 *parent_ino,
2151 u64 *parent_gen,
2152 struct fs_path *dest)
2154 int ret;
2155 int nce_ret;
2156 struct name_cache_entry *nce = NULL;
2159 * First check if we already did a call to this function with the same
2160 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2161 * return the cached result.
2163 nce = name_cache_search(sctx, ino, gen);
2164 if (nce) {
2165 if (ino < sctx->send_progress && nce->need_later_update) {
2166 name_cache_delete(sctx, nce);
2167 kfree(nce);
2168 nce = NULL;
2169 } else {
2170 name_cache_used(sctx, nce);
2171 *parent_ino = nce->parent_ino;
2172 *parent_gen = nce->parent_gen;
2173 ret = fs_path_add(dest, nce->name, nce->name_len);
2174 if (ret < 0)
2175 goto out;
2176 ret = nce->ret;
2177 goto out;
2182 * If the inode is not existent yet, add the orphan name and return 1.
2183 * This should only happen for the parent dir that we determine in
2184 * __record_new_ref
2186 ret = is_inode_existent(sctx, ino, gen);
2187 if (ret < 0)
2188 goto out;
2190 if (!ret) {
2191 ret = gen_unique_name(sctx, ino, gen, dest);
2192 if (ret < 0)
2193 goto out;
2194 ret = 1;
2195 goto out_cache;
2199 * Depending on whether the inode was already processed or not, use
2200 * send_root or parent_root for ref lookup.
2202 if (ino < sctx->send_progress)
2203 ret = get_first_ref(sctx->send_root, ino,
2204 parent_ino, parent_gen, dest);
2205 else
2206 ret = get_first_ref(sctx->parent_root, ino,
2207 parent_ino, parent_gen, dest);
2208 if (ret < 0)
2209 goto out;
2212 * Check if the ref was overwritten by an inode's ref that was processed
2213 * earlier. If yes, treat as orphan and return 1.
2215 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2216 dest->start, dest->end - dest->start);
2217 if (ret < 0)
2218 goto out;
2219 if (ret) {
2220 fs_path_reset(dest);
2221 ret = gen_unique_name(sctx, ino, gen, dest);
2222 if (ret < 0)
2223 goto out;
2224 ret = 1;
2227 out_cache:
2229 * Store the result of the lookup in the name cache.
2231 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
2232 if (!nce) {
2233 ret = -ENOMEM;
2234 goto out;
2237 nce->ino = ino;
2238 nce->gen = gen;
2239 nce->parent_ino = *parent_ino;
2240 nce->parent_gen = *parent_gen;
2241 nce->name_len = fs_path_len(dest);
2242 nce->ret = ret;
2243 strcpy(nce->name, dest->start);
2245 if (ino < sctx->send_progress)
2246 nce->need_later_update = 0;
2247 else
2248 nce->need_later_update = 1;
2250 nce_ret = name_cache_insert(sctx, nce);
2251 if (nce_ret < 0)
2252 ret = nce_ret;
2253 name_cache_clean_unused(sctx);
2255 out:
2256 return ret;
2260 * Magic happens here. This function returns the first ref to an inode as it
2261 * would look like while receiving the stream at this point in time.
2262 * We walk the path up to the root. For every inode in between, we check if it
2263 * was already processed/sent. If yes, we continue with the parent as found
2264 * in send_root. If not, we continue with the parent as found in parent_root.
2265 * If we encounter an inode that was deleted at this point in time, we use the
2266 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2267 * that were not created yet and overwritten inodes/refs.
2269 * When do we have have orphan inodes:
2270 * 1. When an inode is freshly created and thus no valid refs are available yet
2271 * 2. When a directory lost all it's refs (deleted) but still has dir items
2272 * inside which were not processed yet (pending for move/delete). If anyone
2273 * tried to get the path to the dir items, it would get a path inside that
2274 * orphan directory.
2275 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2276 * of an unprocessed inode. If in that case the first ref would be
2277 * overwritten, the overwritten inode gets "orphanized". Later when we
2278 * process this overwritten inode, it is restored at a new place by moving
2279 * the orphan inode.
2281 * sctx->send_progress tells this function at which point in time receiving
2282 * would be.
2284 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2285 struct fs_path *dest)
2287 int ret = 0;
2288 struct fs_path *name = NULL;
2289 u64 parent_inode = 0;
2290 u64 parent_gen = 0;
2291 int stop = 0;
2293 name = fs_path_alloc();
2294 if (!name) {
2295 ret = -ENOMEM;
2296 goto out;
2299 dest->reversed = 1;
2300 fs_path_reset(dest);
2302 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2303 struct waiting_dir_move *wdm;
2305 fs_path_reset(name);
2307 if (is_waiting_for_rm(sctx, ino)) {
2308 ret = gen_unique_name(sctx, ino, gen, name);
2309 if (ret < 0)
2310 goto out;
2311 ret = fs_path_add_path(dest, name);
2312 break;
2315 wdm = get_waiting_dir_move(sctx, ino);
2316 if (wdm && wdm->orphanized) {
2317 ret = gen_unique_name(sctx, ino, gen, name);
2318 stop = 1;
2319 } else if (wdm) {
2320 ret = get_first_ref(sctx->parent_root, ino,
2321 &parent_inode, &parent_gen, name);
2322 } else {
2323 ret = __get_cur_name_and_parent(sctx, ino, gen,
2324 &parent_inode,
2325 &parent_gen, name);
2326 if (ret)
2327 stop = 1;
2330 if (ret < 0)
2331 goto out;
2333 ret = fs_path_add_path(dest, name);
2334 if (ret < 0)
2335 goto out;
2337 ino = parent_inode;
2338 gen = parent_gen;
2341 out:
2342 fs_path_free(name);
2343 if (!ret)
2344 fs_path_unreverse(dest);
2345 return ret;
2349 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2351 static int send_subvol_begin(struct send_ctx *sctx)
2353 int ret;
2354 struct btrfs_root *send_root = sctx->send_root;
2355 struct btrfs_root *parent_root = sctx->parent_root;
2356 struct btrfs_path *path;
2357 struct btrfs_key key;
2358 struct btrfs_root_ref *ref;
2359 struct extent_buffer *leaf;
2360 char *name = NULL;
2361 int namelen;
2363 path = btrfs_alloc_path();
2364 if (!path)
2365 return -ENOMEM;
2367 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
2368 if (!name) {
2369 btrfs_free_path(path);
2370 return -ENOMEM;
2373 key.objectid = send_root->objectid;
2374 key.type = BTRFS_ROOT_BACKREF_KEY;
2375 key.offset = 0;
2377 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2378 &key, path, 1, 0);
2379 if (ret < 0)
2380 goto out;
2381 if (ret) {
2382 ret = -ENOENT;
2383 goto out;
2386 leaf = path->nodes[0];
2387 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2388 if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2389 key.objectid != send_root->objectid) {
2390 ret = -ENOENT;
2391 goto out;
2393 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2394 namelen = btrfs_root_ref_name_len(leaf, ref);
2395 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2396 btrfs_release_path(path);
2398 if (parent_root) {
2399 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2400 if (ret < 0)
2401 goto out;
2402 } else {
2403 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2404 if (ret < 0)
2405 goto out;
2408 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2410 if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
2411 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2412 sctx->send_root->root_item.received_uuid);
2413 else
2414 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2415 sctx->send_root->root_item.uuid);
2417 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2418 le64_to_cpu(sctx->send_root->root_item.ctransid));
2419 if (parent_root) {
2420 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
2421 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2422 parent_root->root_item.received_uuid);
2423 else
2424 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2425 parent_root->root_item.uuid);
2426 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2427 le64_to_cpu(sctx->parent_root->root_item.ctransid));
2430 ret = send_cmd(sctx);
2432 tlv_put_failure:
2433 out:
2434 btrfs_free_path(path);
2435 kfree(name);
2436 return ret;
2439 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2441 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2442 int ret = 0;
2443 struct fs_path *p;
2445 btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
2447 p = fs_path_alloc();
2448 if (!p)
2449 return -ENOMEM;
2451 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2452 if (ret < 0)
2453 goto out;
2455 ret = get_cur_path(sctx, ino, gen, p);
2456 if (ret < 0)
2457 goto out;
2458 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2459 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2461 ret = send_cmd(sctx);
2463 tlv_put_failure:
2464 out:
2465 fs_path_free(p);
2466 return ret;
2469 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2471 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2472 int ret = 0;
2473 struct fs_path *p;
2475 btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
2477 p = fs_path_alloc();
2478 if (!p)
2479 return -ENOMEM;
2481 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2482 if (ret < 0)
2483 goto out;
2485 ret = get_cur_path(sctx, ino, gen, p);
2486 if (ret < 0)
2487 goto out;
2488 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2489 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2491 ret = send_cmd(sctx);
2493 tlv_put_failure:
2494 out:
2495 fs_path_free(p);
2496 return ret;
2499 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2501 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2502 int ret = 0;
2503 struct fs_path *p;
2505 btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
2506 ino, uid, gid);
2508 p = fs_path_alloc();
2509 if (!p)
2510 return -ENOMEM;
2512 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2513 if (ret < 0)
2514 goto out;
2516 ret = get_cur_path(sctx, ino, gen, p);
2517 if (ret < 0)
2518 goto out;
2519 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2520 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2521 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2523 ret = send_cmd(sctx);
2525 tlv_put_failure:
2526 out:
2527 fs_path_free(p);
2528 return ret;
2531 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2533 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2534 int ret = 0;
2535 struct fs_path *p = NULL;
2536 struct btrfs_inode_item *ii;
2537 struct btrfs_path *path = NULL;
2538 struct extent_buffer *eb;
2539 struct btrfs_key key;
2540 int slot;
2542 btrfs_debug(fs_info, "send_utimes %llu", ino);
2544 p = fs_path_alloc();
2545 if (!p)
2546 return -ENOMEM;
2548 path = alloc_path_for_send();
2549 if (!path) {
2550 ret = -ENOMEM;
2551 goto out;
2554 key.objectid = ino;
2555 key.type = BTRFS_INODE_ITEM_KEY;
2556 key.offset = 0;
2557 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2558 if (ret > 0)
2559 ret = -ENOENT;
2560 if (ret < 0)
2561 goto out;
2563 eb = path->nodes[0];
2564 slot = path->slots[0];
2565 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2567 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2568 if (ret < 0)
2569 goto out;
2571 ret = get_cur_path(sctx, ino, gen, p);
2572 if (ret < 0)
2573 goto out;
2574 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2575 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2576 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2577 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2578 /* TODO Add otime support when the otime patches get into upstream */
2580 ret = send_cmd(sctx);
2582 tlv_put_failure:
2583 out:
2584 fs_path_free(p);
2585 btrfs_free_path(path);
2586 return ret;
2590 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2591 * a valid path yet because we did not process the refs yet. So, the inode
2592 * is created as orphan.
2594 static int send_create_inode(struct send_ctx *sctx, u64 ino)
2596 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2597 int ret = 0;
2598 struct fs_path *p;
2599 int cmd;
2600 u64 gen;
2601 u64 mode;
2602 u64 rdev;
2604 btrfs_debug(fs_info, "send_create_inode %llu", ino);
2606 p = fs_path_alloc();
2607 if (!p)
2608 return -ENOMEM;
2610 if (ino != sctx->cur_ino) {
2611 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
2612 NULL, NULL, &rdev);
2613 if (ret < 0)
2614 goto out;
2615 } else {
2616 gen = sctx->cur_inode_gen;
2617 mode = sctx->cur_inode_mode;
2618 rdev = sctx->cur_inode_rdev;
2621 if (S_ISREG(mode)) {
2622 cmd = BTRFS_SEND_C_MKFILE;
2623 } else if (S_ISDIR(mode)) {
2624 cmd = BTRFS_SEND_C_MKDIR;
2625 } else if (S_ISLNK(mode)) {
2626 cmd = BTRFS_SEND_C_SYMLINK;
2627 } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2628 cmd = BTRFS_SEND_C_MKNOD;
2629 } else if (S_ISFIFO(mode)) {
2630 cmd = BTRFS_SEND_C_MKFIFO;
2631 } else if (S_ISSOCK(mode)) {
2632 cmd = BTRFS_SEND_C_MKSOCK;
2633 } else {
2634 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2635 (int)(mode & S_IFMT));
2636 ret = -EOPNOTSUPP;
2637 goto out;
2640 ret = begin_cmd(sctx, cmd);
2641 if (ret < 0)
2642 goto out;
2644 ret = gen_unique_name(sctx, ino, gen, p);
2645 if (ret < 0)
2646 goto out;
2648 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2649 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2651 if (S_ISLNK(mode)) {
2652 fs_path_reset(p);
2653 ret = read_symlink(sctx->send_root, ino, p);
2654 if (ret < 0)
2655 goto out;
2656 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2657 } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2658 S_ISFIFO(mode) || S_ISSOCK(mode)) {
2659 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2660 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2663 ret = send_cmd(sctx);
2664 if (ret < 0)
2665 goto out;
2668 tlv_put_failure:
2669 out:
2670 fs_path_free(p);
2671 return ret;
2675 * We need some special handling for inodes that get processed before the parent
2676 * directory got created. See process_recorded_refs for details.
2677 * This function does the check if we already created the dir out of order.
2679 static int did_create_dir(struct send_ctx *sctx, u64 dir)
2681 int ret = 0;
2682 struct btrfs_path *path = NULL;
2683 struct btrfs_key key;
2684 struct btrfs_key found_key;
2685 struct btrfs_key di_key;
2686 struct extent_buffer *eb;
2687 struct btrfs_dir_item *di;
2688 int slot;
2690 path = alloc_path_for_send();
2691 if (!path) {
2692 ret = -ENOMEM;
2693 goto out;
2696 key.objectid = dir;
2697 key.type = BTRFS_DIR_INDEX_KEY;
2698 key.offset = 0;
2699 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2700 if (ret < 0)
2701 goto out;
2703 while (1) {
2704 eb = path->nodes[0];
2705 slot = path->slots[0];
2706 if (slot >= btrfs_header_nritems(eb)) {
2707 ret = btrfs_next_leaf(sctx->send_root, path);
2708 if (ret < 0) {
2709 goto out;
2710 } else if (ret > 0) {
2711 ret = 0;
2712 break;
2714 continue;
2717 btrfs_item_key_to_cpu(eb, &found_key, slot);
2718 if (found_key.objectid != key.objectid ||
2719 found_key.type != key.type) {
2720 ret = 0;
2721 goto out;
2724 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2725 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2727 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2728 di_key.objectid < sctx->send_progress) {
2729 ret = 1;
2730 goto out;
2733 path->slots[0]++;
2736 out:
2737 btrfs_free_path(path);
2738 return ret;
2742 * Only creates the inode if it is:
2743 * 1. Not a directory
2744 * 2. Or a directory which was not created already due to out of order
2745 * directories. See did_create_dir and process_recorded_refs for details.
2747 static int send_create_inode_if_needed(struct send_ctx *sctx)
2749 int ret;
2751 if (S_ISDIR(sctx->cur_inode_mode)) {
2752 ret = did_create_dir(sctx, sctx->cur_ino);
2753 if (ret < 0)
2754 goto out;
2755 if (ret) {
2756 ret = 0;
2757 goto out;
2761 ret = send_create_inode(sctx, sctx->cur_ino);
2762 if (ret < 0)
2763 goto out;
2765 out:
2766 return ret;
2769 struct recorded_ref {
2770 struct list_head list;
2771 char *name;
2772 struct fs_path *full_path;
2773 u64 dir;
2774 u64 dir_gen;
2775 int name_len;
2778 static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
2780 ref->full_path = path;
2781 ref->name = (char *)kbasename(ref->full_path->start);
2782 ref->name_len = ref->full_path->end - ref->name;
2786 * We need to process new refs before deleted refs, but compare_tree gives us
2787 * everything mixed. So we first record all refs and later process them.
2788 * This function is a helper to record one ref.
2790 static int __record_ref(struct list_head *head, u64 dir,
2791 u64 dir_gen, struct fs_path *path)
2793 struct recorded_ref *ref;
2795 ref = kmalloc(sizeof(*ref), GFP_KERNEL);
2796 if (!ref)
2797 return -ENOMEM;
2799 ref->dir = dir;
2800 ref->dir_gen = dir_gen;
2801 set_ref_path(ref, path);
2802 list_add_tail(&ref->list, head);
2803 return 0;
2806 static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2808 struct recorded_ref *new;
2810 new = kmalloc(sizeof(*ref), GFP_KERNEL);
2811 if (!new)
2812 return -ENOMEM;
2814 new->dir = ref->dir;
2815 new->dir_gen = ref->dir_gen;
2816 new->full_path = NULL;
2817 INIT_LIST_HEAD(&new->list);
2818 list_add_tail(&new->list, list);
2819 return 0;
2822 static void __free_recorded_refs(struct list_head *head)
2824 struct recorded_ref *cur;
2826 while (!list_empty(head)) {
2827 cur = list_entry(head->next, struct recorded_ref, list);
2828 fs_path_free(cur->full_path);
2829 list_del(&cur->list);
2830 kfree(cur);
2834 static void free_recorded_refs(struct send_ctx *sctx)
2836 __free_recorded_refs(&sctx->new_refs);
2837 __free_recorded_refs(&sctx->deleted_refs);
2841 * Renames/moves a file/dir to its orphan name. Used when the first
2842 * ref of an unprocessed inode gets overwritten and for all non empty
2843 * directories.
2845 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2846 struct fs_path *path)
2848 int ret;
2849 struct fs_path *orphan;
2851 orphan = fs_path_alloc();
2852 if (!orphan)
2853 return -ENOMEM;
2855 ret = gen_unique_name(sctx, ino, gen, orphan);
2856 if (ret < 0)
2857 goto out;
2859 ret = send_rename(sctx, path, orphan);
2861 out:
2862 fs_path_free(orphan);
2863 return ret;
2866 static struct orphan_dir_info *
2867 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2869 struct rb_node **p = &sctx->orphan_dirs.rb_node;
2870 struct rb_node *parent = NULL;
2871 struct orphan_dir_info *entry, *odi;
2873 while (*p) {
2874 parent = *p;
2875 entry = rb_entry(parent, struct orphan_dir_info, node);
2876 if (dir_ino < entry->ino) {
2877 p = &(*p)->rb_left;
2878 } else if (dir_ino > entry->ino) {
2879 p = &(*p)->rb_right;
2880 } else {
2881 return entry;
2885 odi = kmalloc(sizeof(*odi), GFP_KERNEL);
2886 if (!odi)
2887 return ERR_PTR(-ENOMEM);
2888 odi->ino = dir_ino;
2889 odi->gen = 0;
2890 odi->last_dir_index_offset = 0;
2892 rb_link_node(&odi->node, parent, p);
2893 rb_insert_color(&odi->node, &sctx->orphan_dirs);
2894 return odi;
2897 static struct orphan_dir_info *
2898 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2900 struct rb_node *n = sctx->orphan_dirs.rb_node;
2901 struct orphan_dir_info *entry;
2903 while (n) {
2904 entry = rb_entry(n, struct orphan_dir_info, node);
2905 if (dir_ino < entry->ino)
2906 n = n->rb_left;
2907 else if (dir_ino > entry->ino)
2908 n = n->rb_right;
2909 else
2910 return entry;
2912 return NULL;
2915 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2917 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2919 return odi != NULL;
2922 static void free_orphan_dir_info(struct send_ctx *sctx,
2923 struct orphan_dir_info *odi)
2925 if (!odi)
2926 return;
2927 rb_erase(&odi->node, &sctx->orphan_dirs);
2928 kfree(odi);
2932 * Returns 1 if a directory can be removed at this point in time.
2933 * We check this by iterating all dir items and checking if the inode behind
2934 * the dir item was already processed.
2936 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2937 u64 send_progress)
2939 int ret = 0;
2940 struct btrfs_root *root = sctx->parent_root;
2941 struct btrfs_path *path;
2942 struct btrfs_key key;
2943 struct btrfs_key found_key;
2944 struct btrfs_key loc;
2945 struct btrfs_dir_item *di;
2946 struct orphan_dir_info *odi = NULL;
2949 * Don't try to rmdir the top/root subvolume dir.
2951 if (dir == BTRFS_FIRST_FREE_OBJECTID)
2952 return 0;
2954 path = alloc_path_for_send();
2955 if (!path)
2956 return -ENOMEM;
2958 key.objectid = dir;
2959 key.type = BTRFS_DIR_INDEX_KEY;
2960 key.offset = 0;
2962 odi = get_orphan_dir_info(sctx, dir);
2963 if (odi)
2964 key.offset = odi->last_dir_index_offset;
2966 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2967 if (ret < 0)
2968 goto out;
2970 while (1) {
2971 struct waiting_dir_move *dm;
2973 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2974 ret = btrfs_next_leaf(root, path);
2975 if (ret < 0)
2976 goto out;
2977 else if (ret > 0)
2978 break;
2979 continue;
2981 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2982 path->slots[0]);
2983 if (found_key.objectid != key.objectid ||
2984 found_key.type != key.type)
2985 break;
2987 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2988 struct btrfs_dir_item);
2989 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2991 dm = get_waiting_dir_move(sctx, loc.objectid);
2992 if (dm) {
2993 odi = add_orphan_dir_info(sctx, dir);
2994 if (IS_ERR(odi)) {
2995 ret = PTR_ERR(odi);
2996 goto out;
2998 odi->gen = dir_gen;
2999 odi->last_dir_index_offset = found_key.offset;
3000 dm->rmdir_ino = dir;
3001 ret = 0;
3002 goto out;
3005 if (loc.objectid > send_progress) {
3006 odi = add_orphan_dir_info(sctx, dir);
3007 if (IS_ERR(odi)) {
3008 ret = PTR_ERR(odi);
3009 goto out;
3011 odi->gen = dir_gen;
3012 odi->last_dir_index_offset = found_key.offset;
3013 ret = 0;
3014 goto out;
3017 path->slots[0]++;
3019 free_orphan_dir_info(sctx, odi);
3021 ret = 1;
3023 out:
3024 btrfs_free_path(path);
3025 return ret;
3028 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
3030 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
3032 return entry != NULL;
3035 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
3037 struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
3038 struct rb_node *parent = NULL;
3039 struct waiting_dir_move *entry, *dm;
3041 dm = kmalloc(sizeof(*dm), GFP_KERNEL);
3042 if (!dm)
3043 return -ENOMEM;
3044 dm->ino = ino;
3045 dm->rmdir_ino = 0;
3046 dm->orphanized = orphanized;
3048 while (*p) {
3049 parent = *p;
3050 entry = rb_entry(parent, struct waiting_dir_move, node);
3051 if (ino < entry->ino) {
3052 p = &(*p)->rb_left;
3053 } else if (ino > entry->ino) {
3054 p = &(*p)->rb_right;
3055 } else {
3056 kfree(dm);
3057 return -EEXIST;
3061 rb_link_node(&dm->node, parent, p);
3062 rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
3063 return 0;
3066 static struct waiting_dir_move *
3067 get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
3069 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
3070 struct waiting_dir_move *entry;
3072 while (n) {
3073 entry = rb_entry(n, struct waiting_dir_move, node);
3074 if (ino < entry->ino)
3075 n = n->rb_left;
3076 else if (ino > entry->ino)
3077 n = n->rb_right;
3078 else
3079 return entry;
3081 return NULL;
3084 static void free_waiting_dir_move(struct send_ctx *sctx,
3085 struct waiting_dir_move *dm)
3087 if (!dm)
3088 return;
3089 rb_erase(&dm->node, &sctx->waiting_dir_moves);
3090 kfree(dm);
3093 static int add_pending_dir_move(struct send_ctx *sctx,
3094 u64 ino,
3095 u64 ino_gen,
3096 u64 parent_ino,
3097 struct list_head *new_refs,
3098 struct list_head *deleted_refs,
3099 const bool is_orphan)
3101 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
3102 struct rb_node *parent = NULL;
3103 struct pending_dir_move *entry = NULL, *pm;
3104 struct recorded_ref *cur;
3105 int exists = 0;
3106 int ret;
3108 pm = kmalloc(sizeof(*pm), GFP_KERNEL);
3109 if (!pm)
3110 return -ENOMEM;
3111 pm->parent_ino = parent_ino;
3112 pm->ino = ino;
3113 pm->gen = ino_gen;
3114 INIT_LIST_HEAD(&pm->list);
3115 INIT_LIST_HEAD(&pm->update_refs);
3116 RB_CLEAR_NODE(&pm->node);
3118 while (*p) {
3119 parent = *p;
3120 entry = rb_entry(parent, struct pending_dir_move, node);
3121 if (parent_ino < entry->parent_ino) {
3122 p = &(*p)->rb_left;
3123 } else if (parent_ino > entry->parent_ino) {
3124 p = &(*p)->rb_right;
3125 } else {
3126 exists = 1;
3127 break;
3131 list_for_each_entry(cur, deleted_refs, list) {
3132 ret = dup_ref(cur, &pm->update_refs);
3133 if (ret < 0)
3134 goto out;
3136 list_for_each_entry(cur, new_refs, list) {
3137 ret = dup_ref(cur, &pm->update_refs);
3138 if (ret < 0)
3139 goto out;
3142 ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
3143 if (ret)
3144 goto out;
3146 if (exists) {
3147 list_add_tail(&pm->list, &entry->list);
3148 } else {
3149 rb_link_node(&pm->node, parent, p);
3150 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
3152 ret = 0;
3153 out:
3154 if (ret) {
3155 __free_recorded_refs(&pm->update_refs);
3156 kfree(pm);
3158 return ret;
3161 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3162 u64 parent_ino)
3164 struct rb_node *n = sctx->pending_dir_moves.rb_node;
3165 struct pending_dir_move *entry;
3167 while (n) {
3168 entry = rb_entry(n, struct pending_dir_move, node);
3169 if (parent_ino < entry->parent_ino)
3170 n = n->rb_left;
3171 else if (parent_ino > entry->parent_ino)
3172 n = n->rb_right;
3173 else
3174 return entry;
3176 return NULL;
3179 static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3180 u64 ino, u64 gen, u64 *ancestor_ino)
3182 int ret = 0;
3183 u64 parent_inode = 0;
3184 u64 parent_gen = 0;
3185 u64 start_ino = ino;
3187 *ancestor_ino = 0;
3188 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3189 fs_path_reset(name);
3191 if (is_waiting_for_rm(sctx, ino))
3192 break;
3193 if (is_waiting_for_move(sctx, ino)) {
3194 if (*ancestor_ino == 0)
3195 *ancestor_ino = ino;
3196 ret = get_first_ref(sctx->parent_root, ino,
3197 &parent_inode, &parent_gen, name);
3198 } else {
3199 ret = __get_cur_name_and_parent(sctx, ino, gen,
3200 &parent_inode,
3201 &parent_gen, name);
3202 if (ret > 0) {
3203 ret = 0;
3204 break;
3207 if (ret < 0)
3208 break;
3209 if (parent_inode == start_ino) {
3210 ret = 1;
3211 if (*ancestor_ino == 0)
3212 *ancestor_ino = ino;
3213 break;
3215 ino = parent_inode;
3216 gen = parent_gen;
3218 return ret;
3221 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3223 struct fs_path *from_path = NULL;
3224 struct fs_path *to_path = NULL;
3225 struct fs_path *name = NULL;
3226 u64 orig_progress = sctx->send_progress;
3227 struct recorded_ref *cur;
3228 u64 parent_ino, parent_gen;
3229 struct waiting_dir_move *dm = NULL;
3230 u64 rmdir_ino = 0;
3231 u64 ancestor;
3232 bool is_orphan;
3233 int ret;
3235 name = fs_path_alloc();
3236 from_path = fs_path_alloc();
3237 if (!name || !from_path) {
3238 ret = -ENOMEM;
3239 goto out;
3242 dm = get_waiting_dir_move(sctx, pm->ino);
3243 ASSERT(dm);
3244 rmdir_ino = dm->rmdir_ino;
3245 is_orphan = dm->orphanized;
3246 free_waiting_dir_move(sctx, dm);
3248 if (is_orphan) {
3249 ret = gen_unique_name(sctx, pm->ino,
3250 pm->gen, from_path);
3251 } else {
3252 ret = get_first_ref(sctx->parent_root, pm->ino,
3253 &parent_ino, &parent_gen, name);
3254 if (ret < 0)
3255 goto out;
3256 ret = get_cur_path(sctx, parent_ino, parent_gen,
3257 from_path);
3258 if (ret < 0)
3259 goto out;
3260 ret = fs_path_add_path(from_path, name);
3262 if (ret < 0)
3263 goto out;
3265 sctx->send_progress = sctx->cur_ino + 1;
3266 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3267 if (ret < 0)
3268 goto out;
3269 if (ret) {
3270 LIST_HEAD(deleted_refs);
3271 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3272 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3273 &pm->update_refs, &deleted_refs,
3274 is_orphan);
3275 if (ret < 0)
3276 goto out;
3277 if (rmdir_ino) {
3278 dm = get_waiting_dir_move(sctx, pm->ino);
3279 ASSERT(dm);
3280 dm->rmdir_ino = rmdir_ino;
3282 goto out;
3284 fs_path_reset(name);
3285 to_path = name;
3286 name = NULL;
3287 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3288 if (ret < 0)
3289 goto out;
3291 ret = send_rename(sctx, from_path, to_path);
3292 if (ret < 0)
3293 goto out;
3295 if (rmdir_ino) {
3296 struct orphan_dir_info *odi;
3297 u64 gen;
3299 odi = get_orphan_dir_info(sctx, rmdir_ino);
3300 if (!odi) {
3301 /* already deleted */
3302 goto finish;
3304 gen = odi->gen;
3306 ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
3307 if (ret < 0)
3308 goto out;
3309 if (!ret)
3310 goto finish;
3312 name = fs_path_alloc();
3313 if (!name) {
3314 ret = -ENOMEM;
3315 goto out;
3317 ret = get_cur_path(sctx, rmdir_ino, gen, name);
3318 if (ret < 0)
3319 goto out;
3320 ret = send_rmdir(sctx, name);
3321 if (ret < 0)
3322 goto out;
3325 finish:
3326 ret = send_utimes(sctx, pm->ino, pm->gen);
3327 if (ret < 0)
3328 goto out;
3331 * After rename/move, need to update the utimes of both new parent(s)
3332 * and old parent(s).
3334 list_for_each_entry(cur, &pm->update_refs, list) {
3336 * The parent inode might have been deleted in the send snapshot
3338 ret = get_inode_info(sctx->send_root, cur->dir, NULL,
3339 NULL, NULL, NULL, NULL, NULL);
3340 if (ret == -ENOENT) {
3341 ret = 0;
3342 continue;
3344 if (ret < 0)
3345 goto out;
3347 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3348 if (ret < 0)
3349 goto out;
3352 out:
3353 fs_path_free(name);
3354 fs_path_free(from_path);
3355 fs_path_free(to_path);
3356 sctx->send_progress = orig_progress;
3358 return ret;
3361 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3363 if (!list_empty(&m->list))
3364 list_del(&m->list);
3365 if (!RB_EMPTY_NODE(&m->node))
3366 rb_erase(&m->node, &sctx->pending_dir_moves);
3367 __free_recorded_refs(&m->update_refs);
3368 kfree(m);
3371 static void tail_append_pending_moves(struct send_ctx *sctx,
3372 struct pending_dir_move *moves,
3373 struct list_head *stack)
3375 if (list_empty(&moves->list)) {
3376 list_add_tail(&moves->list, stack);
3377 } else {
3378 LIST_HEAD(list);
3379 list_splice_init(&moves->list, &list);
3380 list_add_tail(&moves->list, stack);
3381 list_splice_tail(&list, stack);
3383 if (!RB_EMPTY_NODE(&moves->node)) {
3384 rb_erase(&moves->node, &sctx->pending_dir_moves);
3385 RB_CLEAR_NODE(&moves->node);
3389 static int apply_children_dir_moves(struct send_ctx *sctx)
3391 struct pending_dir_move *pm;
3392 struct list_head stack;
3393 u64 parent_ino = sctx->cur_ino;
3394 int ret = 0;
3396 pm = get_pending_dir_moves(sctx, parent_ino);
3397 if (!pm)
3398 return 0;
3400 INIT_LIST_HEAD(&stack);
3401 tail_append_pending_moves(sctx, pm, &stack);
3403 while (!list_empty(&stack)) {
3404 pm = list_first_entry(&stack, struct pending_dir_move, list);
3405 parent_ino = pm->ino;
3406 ret = apply_dir_move(sctx, pm);
3407 free_pending_move(sctx, pm);
3408 if (ret)
3409 goto out;
3410 pm = get_pending_dir_moves(sctx, parent_ino);
3411 if (pm)
3412 tail_append_pending_moves(sctx, pm, &stack);
3414 return 0;
3416 out:
3417 while (!list_empty(&stack)) {
3418 pm = list_first_entry(&stack, struct pending_dir_move, list);
3419 free_pending_move(sctx, pm);
3421 return ret;
3425 * We might need to delay a directory rename even when no ancestor directory
3426 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3427 * renamed. This happens when we rename a directory to the old name (the name
3428 * in the parent root) of some other unrelated directory that got its rename
3429 * delayed due to some ancestor with higher number that got renamed.
3431 * Example:
3433 * Parent snapshot:
3434 * . (ino 256)
3435 * |---- a/ (ino 257)
3436 * | |---- file (ino 260)
3438 * |---- b/ (ino 258)
3439 * |---- c/ (ino 259)
3441 * Send snapshot:
3442 * . (ino 256)
3443 * |---- a/ (ino 258)
3444 * |---- x/ (ino 259)
3445 * |---- y/ (ino 257)
3446 * |----- file (ino 260)
3448 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3449 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3450 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3451 * must issue is:
3453 * 1 - rename 259 from 'c' to 'x'
3454 * 2 - rename 257 from 'a' to 'x/y'
3455 * 3 - rename 258 from 'b' to 'a'
3457 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3458 * be done right away and < 0 on error.
3460 static int wait_for_dest_dir_move(struct send_ctx *sctx,
3461 struct recorded_ref *parent_ref,
3462 const bool is_orphan)
3464 struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
3465 struct btrfs_path *path;
3466 struct btrfs_key key;
3467 struct btrfs_key di_key;
3468 struct btrfs_dir_item *di;
3469 u64 left_gen;
3470 u64 right_gen;
3471 int ret = 0;
3472 struct waiting_dir_move *wdm;
3474 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3475 return 0;
3477 path = alloc_path_for_send();
3478 if (!path)
3479 return -ENOMEM;
3481 key.objectid = parent_ref->dir;
3482 key.type = BTRFS_DIR_ITEM_KEY;
3483 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3485 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3486 if (ret < 0) {
3487 goto out;
3488 } else if (ret > 0) {
3489 ret = 0;
3490 goto out;
3493 di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
3494 parent_ref->name_len);
3495 if (!di) {
3496 ret = 0;
3497 goto out;
3500 * di_key.objectid has the number of the inode that has a dentry in the
3501 * parent directory with the same name that sctx->cur_ino is being
3502 * renamed to. We need to check if that inode is in the send root as
3503 * well and if it is currently marked as an inode with a pending rename,
3504 * if it is, we need to delay the rename of sctx->cur_ino as well, so
3505 * that it happens after that other inode is renamed.
3507 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3508 if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3509 ret = 0;
3510 goto out;
3513 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3514 &left_gen, NULL, NULL, NULL, NULL);
3515 if (ret < 0)
3516 goto out;
3517 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3518 &right_gen, NULL, NULL, NULL, NULL);
3519 if (ret < 0) {
3520 if (ret == -ENOENT)
3521 ret = 0;
3522 goto out;
3525 /* Different inode, no need to delay the rename of sctx->cur_ino */
3526 if (right_gen != left_gen) {
3527 ret = 0;
3528 goto out;
3531 wdm = get_waiting_dir_move(sctx, di_key.objectid);
3532 if (wdm && !wdm->orphanized) {
3533 ret = add_pending_dir_move(sctx,
3534 sctx->cur_ino,
3535 sctx->cur_inode_gen,
3536 di_key.objectid,
3537 &sctx->new_refs,
3538 &sctx->deleted_refs,
3539 is_orphan);
3540 if (!ret)
3541 ret = 1;
3543 out:
3544 btrfs_free_path(path);
3545 return ret;
3549 * Check if inode ino2, or any of its ancestors, is inode ino1.
3550 * Return 1 if true, 0 if false and < 0 on error.
3552 static int check_ino_in_path(struct btrfs_root *root,
3553 const u64 ino1,
3554 const u64 ino1_gen,
3555 const u64 ino2,
3556 const u64 ino2_gen,
3557 struct fs_path *fs_path)
3559 u64 ino = ino2;
3561 if (ino1 == ino2)
3562 return ino1_gen == ino2_gen;
3564 while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3565 u64 parent;
3566 u64 parent_gen;
3567 int ret;
3569 fs_path_reset(fs_path);
3570 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
3571 if (ret < 0)
3572 return ret;
3573 if (parent == ino1)
3574 return parent_gen == ino1_gen;
3575 ino = parent;
3577 return 0;
3581 * Check if ino ino1 is an ancestor of inode ino2 in the given root for any
3582 * possible path (in case ino2 is not a directory and has multiple hard links).
3583 * Return 1 if true, 0 if false and < 0 on error.
3585 static int is_ancestor(struct btrfs_root *root,
3586 const u64 ino1,
3587 const u64 ino1_gen,
3588 const u64 ino2,
3589 struct fs_path *fs_path)
3591 bool free_fs_path = false;
3592 int ret = 0;
3593 struct btrfs_path *path = NULL;
3594 struct btrfs_key key;
3596 if (!fs_path) {
3597 fs_path = fs_path_alloc();
3598 if (!fs_path)
3599 return -ENOMEM;
3600 free_fs_path = true;
3603 path = alloc_path_for_send();
3604 if (!path) {
3605 ret = -ENOMEM;
3606 goto out;
3609 key.objectid = ino2;
3610 key.type = BTRFS_INODE_REF_KEY;
3611 key.offset = 0;
3613 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3614 if (ret < 0)
3615 goto out;
3617 while (true) {
3618 struct extent_buffer *leaf = path->nodes[0];
3619 int slot = path->slots[0];
3620 u32 cur_offset = 0;
3621 u32 item_size;
3623 if (slot >= btrfs_header_nritems(leaf)) {
3624 ret = btrfs_next_leaf(root, path);
3625 if (ret < 0)
3626 goto out;
3627 if (ret > 0)
3628 break;
3629 continue;
3632 btrfs_item_key_to_cpu(leaf, &key, slot);
3633 if (key.objectid != ino2)
3634 break;
3635 if (key.type != BTRFS_INODE_REF_KEY &&
3636 key.type != BTRFS_INODE_EXTREF_KEY)
3637 break;
3639 item_size = btrfs_item_size_nr(leaf, slot);
3640 while (cur_offset < item_size) {
3641 u64 parent;
3642 u64 parent_gen;
3644 if (key.type == BTRFS_INODE_EXTREF_KEY) {
3645 unsigned long ptr;
3646 struct btrfs_inode_extref *extref;
3648 ptr = btrfs_item_ptr_offset(leaf, slot);
3649 extref = (struct btrfs_inode_extref *)
3650 (ptr + cur_offset);
3651 parent = btrfs_inode_extref_parent(leaf,
3652 extref);
3653 cur_offset += sizeof(*extref);
3654 cur_offset += btrfs_inode_extref_name_len(leaf,
3655 extref);
3656 } else {
3657 parent = key.offset;
3658 cur_offset = item_size;
3661 ret = get_inode_info(root, parent, NULL, &parent_gen,
3662 NULL, NULL, NULL, NULL);
3663 if (ret < 0)
3664 goto out;
3665 ret = check_ino_in_path(root, ino1, ino1_gen,
3666 parent, parent_gen, fs_path);
3667 if (ret)
3668 goto out;
3670 path->slots[0]++;
3672 ret = 0;
3673 out:
3674 btrfs_free_path(path);
3675 if (free_fs_path)
3676 fs_path_free(fs_path);
3677 return ret;
3680 static int wait_for_parent_move(struct send_ctx *sctx,
3681 struct recorded_ref *parent_ref,
3682 const bool is_orphan)
3684 int ret = 0;
3685 u64 ino = parent_ref->dir;
3686 u64 ino_gen = parent_ref->dir_gen;
3687 u64 parent_ino_before, parent_ino_after;
3688 struct fs_path *path_before = NULL;
3689 struct fs_path *path_after = NULL;
3690 int len1, len2;
3692 path_after = fs_path_alloc();
3693 path_before = fs_path_alloc();
3694 if (!path_after || !path_before) {
3695 ret = -ENOMEM;
3696 goto out;
3700 * Our current directory inode may not yet be renamed/moved because some
3701 * ancestor (immediate or not) has to be renamed/moved first. So find if
3702 * such ancestor exists and make sure our own rename/move happens after
3703 * that ancestor is processed to avoid path build infinite loops (done
3704 * at get_cur_path()).
3706 while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3707 u64 parent_ino_after_gen;
3709 if (is_waiting_for_move(sctx, ino)) {
3711 * If the current inode is an ancestor of ino in the
3712 * parent root, we need to delay the rename of the
3713 * current inode, otherwise don't delayed the rename
3714 * because we can end up with a circular dependency
3715 * of renames, resulting in some directories never
3716 * getting the respective rename operations issued in
3717 * the send stream or getting into infinite path build
3718 * loops.
3720 ret = is_ancestor(sctx->parent_root,
3721 sctx->cur_ino, sctx->cur_inode_gen,
3722 ino, path_before);
3723 if (ret)
3724 break;
3727 fs_path_reset(path_before);
3728 fs_path_reset(path_after);
3730 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3731 &parent_ino_after_gen, path_after);
3732 if (ret < 0)
3733 goto out;
3734 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3735 NULL, path_before);
3736 if (ret < 0 && ret != -ENOENT) {
3737 goto out;
3738 } else if (ret == -ENOENT) {
3739 ret = 0;
3740 break;
3743 len1 = fs_path_len(path_before);
3744 len2 = fs_path_len(path_after);
3745 if (ino > sctx->cur_ino &&
3746 (parent_ino_before != parent_ino_after || len1 != len2 ||
3747 memcmp(path_before->start, path_after->start, len1))) {
3748 u64 parent_ino_gen;
3750 ret = get_inode_info(sctx->parent_root, ino, NULL,
3751 &parent_ino_gen, NULL, NULL, NULL,
3752 NULL);
3753 if (ret < 0)
3754 goto out;
3755 if (ino_gen == parent_ino_gen) {
3756 ret = 1;
3757 break;
3760 ino = parent_ino_after;
3761 ino_gen = parent_ino_after_gen;
3764 out:
3765 fs_path_free(path_before);
3766 fs_path_free(path_after);
3768 if (ret == 1) {
3769 ret = add_pending_dir_move(sctx,
3770 sctx->cur_ino,
3771 sctx->cur_inode_gen,
3772 ino,
3773 &sctx->new_refs,
3774 &sctx->deleted_refs,
3775 is_orphan);
3776 if (!ret)
3777 ret = 1;
3780 return ret;
3783 static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
3785 int ret;
3786 struct fs_path *new_path;
3789 * Our reference's name member points to its full_path member string, so
3790 * we use here a new path.
3792 new_path = fs_path_alloc();
3793 if (!new_path)
3794 return -ENOMEM;
3796 ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
3797 if (ret < 0) {
3798 fs_path_free(new_path);
3799 return ret;
3801 ret = fs_path_add(new_path, ref->name, ref->name_len);
3802 if (ret < 0) {
3803 fs_path_free(new_path);
3804 return ret;
3807 fs_path_free(ref->full_path);
3808 set_ref_path(ref, new_path);
3810 return 0;
3814 * This does all the move/link/unlink/rmdir magic.
3816 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3818 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
3819 int ret = 0;
3820 struct recorded_ref *cur;
3821 struct recorded_ref *cur2;
3822 struct list_head check_dirs;
3823 struct fs_path *valid_path = NULL;
3824 u64 ow_inode = 0;
3825 u64 ow_gen;
3826 u64 ow_mode;
3827 int did_overwrite = 0;
3828 int is_orphan = 0;
3829 u64 last_dir_ino_rm = 0;
3830 bool can_rename = true;
3831 bool orphanized_dir = false;
3832 bool orphanized_ancestor = false;
3834 btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
3837 * This should never happen as the root dir always has the same ref
3838 * which is always '..'
3840 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3841 INIT_LIST_HEAD(&check_dirs);
3843 valid_path = fs_path_alloc();
3844 if (!valid_path) {
3845 ret = -ENOMEM;
3846 goto out;
3850 * First, check if the first ref of the current inode was overwritten
3851 * before. If yes, we know that the current inode was already orphanized
3852 * and thus use the orphan name. If not, we can use get_cur_path to
3853 * get the path of the first ref as it would like while receiving at
3854 * this point in time.
3855 * New inodes are always orphan at the beginning, so force to use the
3856 * orphan name in this case.
3857 * The first ref is stored in valid_path and will be updated if it
3858 * gets moved around.
3860 if (!sctx->cur_inode_new) {
3861 ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3862 sctx->cur_inode_gen);
3863 if (ret < 0)
3864 goto out;
3865 if (ret)
3866 did_overwrite = 1;
3868 if (sctx->cur_inode_new || did_overwrite) {
3869 ret = gen_unique_name(sctx, sctx->cur_ino,
3870 sctx->cur_inode_gen, valid_path);
3871 if (ret < 0)
3872 goto out;
3873 is_orphan = 1;
3874 } else {
3875 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3876 valid_path);
3877 if (ret < 0)
3878 goto out;
3881 list_for_each_entry(cur, &sctx->new_refs, list) {
3883 * We may have refs where the parent directory does not exist
3884 * yet. This happens if the parent directories inum is higher
3885 * the the current inum. To handle this case, we create the
3886 * parent directory out of order. But we need to check if this
3887 * did already happen before due to other refs in the same dir.
3889 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3890 if (ret < 0)
3891 goto out;
3892 if (ret == inode_state_will_create) {
3893 ret = 0;
3895 * First check if any of the current inodes refs did
3896 * already create the dir.
3898 list_for_each_entry(cur2, &sctx->new_refs, list) {
3899 if (cur == cur2)
3900 break;
3901 if (cur2->dir == cur->dir) {
3902 ret = 1;
3903 break;
3908 * If that did not happen, check if a previous inode
3909 * did already create the dir.
3911 if (!ret)
3912 ret = did_create_dir(sctx, cur->dir);
3913 if (ret < 0)
3914 goto out;
3915 if (!ret) {
3916 ret = send_create_inode(sctx, cur->dir);
3917 if (ret < 0)
3918 goto out;
3923 * Check if this new ref would overwrite the first ref of
3924 * another unprocessed inode. If yes, orphanize the
3925 * overwritten inode. If we find an overwritten ref that is
3926 * not the first ref, simply unlink it.
3928 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3929 cur->name, cur->name_len,
3930 &ow_inode, &ow_gen, &ow_mode);
3931 if (ret < 0)
3932 goto out;
3933 if (ret) {
3934 ret = is_first_ref(sctx->parent_root,
3935 ow_inode, cur->dir, cur->name,
3936 cur->name_len);
3937 if (ret < 0)
3938 goto out;
3939 if (ret) {
3940 struct name_cache_entry *nce;
3941 struct waiting_dir_move *wdm;
3943 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3944 cur->full_path);
3945 if (ret < 0)
3946 goto out;
3947 if (S_ISDIR(ow_mode))
3948 orphanized_dir = true;
3951 * If ow_inode has its rename operation delayed
3952 * make sure that its orphanized name is used in
3953 * the source path when performing its rename
3954 * operation.
3956 if (is_waiting_for_move(sctx, ow_inode)) {
3957 wdm = get_waiting_dir_move(sctx,
3958 ow_inode);
3959 ASSERT(wdm);
3960 wdm->orphanized = true;
3964 * Make sure we clear our orphanized inode's
3965 * name from the name cache. This is because the
3966 * inode ow_inode might be an ancestor of some
3967 * other inode that will be orphanized as well
3968 * later and has an inode number greater than
3969 * sctx->send_progress. We need to prevent
3970 * future name lookups from using the old name
3971 * and get instead the orphan name.
3973 nce = name_cache_search(sctx, ow_inode, ow_gen);
3974 if (nce) {
3975 name_cache_delete(sctx, nce);
3976 kfree(nce);
3980 * ow_inode might currently be an ancestor of
3981 * cur_ino, therefore compute valid_path (the
3982 * current path of cur_ino) again because it
3983 * might contain the pre-orphanization name of
3984 * ow_inode, which is no longer valid.
3986 ret = is_ancestor(sctx->parent_root,
3987 ow_inode, ow_gen,
3988 sctx->cur_ino, NULL);
3989 if (ret > 0) {
3990 orphanized_ancestor = true;
3991 fs_path_reset(valid_path);
3992 ret = get_cur_path(sctx, sctx->cur_ino,
3993 sctx->cur_inode_gen,
3994 valid_path);
3996 if (ret < 0)
3997 goto out;
3998 } else {
3999 ret = send_unlink(sctx, cur->full_path);
4000 if (ret < 0)
4001 goto out;
4005 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
4006 ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
4007 if (ret < 0)
4008 goto out;
4009 if (ret == 1) {
4010 can_rename = false;
4011 *pending_move = 1;
4015 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
4016 can_rename) {
4017 ret = wait_for_parent_move(sctx, cur, is_orphan);
4018 if (ret < 0)
4019 goto out;
4020 if (ret == 1) {
4021 can_rename = false;
4022 *pending_move = 1;
4027 * link/move the ref to the new place. If we have an orphan
4028 * inode, move it and update valid_path. If not, link or move
4029 * it depending on the inode mode.
4031 if (is_orphan && can_rename) {
4032 ret = send_rename(sctx, valid_path, cur->full_path);
4033 if (ret < 0)
4034 goto out;
4035 is_orphan = 0;
4036 ret = fs_path_copy(valid_path, cur->full_path);
4037 if (ret < 0)
4038 goto out;
4039 } else if (can_rename) {
4040 if (S_ISDIR(sctx->cur_inode_mode)) {
4042 * Dirs can't be linked, so move it. For moved
4043 * dirs, we always have one new and one deleted
4044 * ref. The deleted ref is ignored later.
4046 ret = send_rename(sctx, valid_path,
4047 cur->full_path);
4048 if (!ret)
4049 ret = fs_path_copy(valid_path,
4050 cur->full_path);
4051 if (ret < 0)
4052 goto out;
4053 } else {
4055 * We might have previously orphanized an inode
4056 * which is an ancestor of our current inode,
4057 * so our reference's full path, which was
4058 * computed before any such orphanizations, must
4059 * be updated.
4061 if (orphanized_dir) {
4062 ret = update_ref_path(sctx, cur);
4063 if (ret < 0)
4064 goto out;
4066 ret = send_link(sctx, cur->full_path,
4067 valid_path);
4068 if (ret < 0)
4069 goto out;
4072 ret = dup_ref(cur, &check_dirs);
4073 if (ret < 0)
4074 goto out;
4077 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
4079 * Check if we can already rmdir the directory. If not,
4080 * orphanize it. For every dir item inside that gets deleted
4081 * later, we do this check again and rmdir it then if possible.
4082 * See the use of check_dirs for more details.
4084 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4085 sctx->cur_ino);
4086 if (ret < 0)
4087 goto out;
4088 if (ret) {
4089 ret = send_rmdir(sctx, valid_path);
4090 if (ret < 0)
4091 goto out;
4092 } else if (!is_orphan) {
4093 ret = orphanize_inode(sctx, sctx->cur_ino,
4094 sctx->cur_inode_gen, valid_path);
4095 if (ret < 0)
4096 goto out;
4097 is_orphan = 1;
4100 list_for_each_entry(cur, &sctx->deleted_refs, list) {
4101 ret = dup_ref(cur, &check_dirs);
4102 if (ret < 0)
4103 goto out;
4105 } else if (S_ISDIR(sctx->cur_inode_mode) &&
4106 !list_empty(&sctx->deleted_refs)) {
4108 * We have a moved dir. Add the old parent to check_dirs
4110 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
4111 list);
4112 ret = dup_ref(cur, &check_dirs);
4113 if (ret < 0)
4114 goto out;
4115 } else if (!S_ISDIR(sctx->cur_inode_mode)) {
4117 * We have a non dir inode. Go through all deleted refs and
4118 * unlink them if they were not already overwritten by other
4119 * inodes.
4121 list_for_each_entry(cur, &sctx->deleted_refs, list) {
4122 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
4123 sctx->cur_ino, sctx->cur_inode_gen,
4124 cur->name, cur->name_len);
4125 if (ret < 0)
4126 goto out;
4127 if (!ret) {
4129 * If we orphanized any ancestor before, we need
4130 * to recompute the full path for deleted names,
4131 * since any such path was computed before we
4132 * processed any references and orphanized any
4133 * ancestor inode.
4135 if (orphanized_ancestor) {
4136 ret = update_ref_path(sctx, cur);
4137 if (ret < 0)
4138 goto out;
4140 ret = send_unlink(sctx, cur->full_path);
4141 if (ret < 0)
4142 goto out;
4144 ret = dup_ref(cur, &check_dirs);
4145 if (ret < 0)
4146 goto out;
4149 * If the inode is still orphan, unlink the orphan. This may
4150 * happen when a previous inode did overwrite the first ref
4151 * of this inode and no new refs were added for the current
4152 * inode. Unlinking does not mean that the inode is deleted in
4153 * all cases. There may still be links to this inode in other
4154 * places.
4156 if (is_orphan) {
4157 ret = send_unlink(sctx, valid_path);
4158 if (ret < 0)
4159 goto out;
4164 * We did collect all parent dirs where cur_inode was once located. We
4165 * now go through all these dirs and check if they are pending for
4166 * deletion and if it's finally possible to perform the rmdir now.
4167 * We also update the inode stats of the parent dirs here.
4169 list_for_each_entry(cur, &check_dirs, list) {
4171 * In case we had refs into dirs that were not processed yet,
4172 * we don't need to do the utime and rmdir logic for these dirs.
4173 * The dir will be processed later.
4175 if (cur->dir > sctx->cur_ino)
4176 continue;
4178 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
4179 if (ret < 0)
4180 goto out;
4182 if (ret == inode_state_did_create ||
4183 ret == inode_state_no_change) {
4184 /* TODO delayed utimes */
4185 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
4186 if (ret < 0)
4187 goto out;
4188 } else if (ret == inode_state_did_delete &&
4189 cur->dir != last_dir_ino_rm) {
4190 ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
4191 sctx->cur_ino);
4192 if (ret < 0)
4193 goto out;
4194 if (ret) {
4195 ret = get_cur_path(sctx, cur->dir,
4196 cur->dir_gen, valid_path);
4197 if (ret < 0)
4198 goto out;
4199 ret = send_rmdir(sctx, valid_path);
4200 if (ret < 0)
4201 goto out;
4202 last_dir_ino_rm = cur->dir;
4207 ret = 0;
4209 out:
4210 __free_recorded_refs(&check_dirs);
4211 free_recorded_refs(sctx);
4212 fs_path_free(valid_path);
4213 return ret;
4216 static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name,
4217 void *ctx, struct list_head *refs)
4219 int ret = 0;
4220 struct send_ctx *sctx = ctx;
4221 struct fs_path *p;
4222 u64 gen;
4224 p = fs_path_alloc();
4225 if (!p)
4226 return -ENOMEM;
4228 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
4229 NULL, NULL);
4230 if (ret < 0)
4231 goto out;
4233 ret = get_cur_path(sctx, dir, gen, p);
4234 if (ret < 0)
4235 goto out;
4236 ret = fs_path_add_path(p, name);
4237 if (ret < 0)
4238 goto out;
4240 ret = __record_ref(refs, dir, gen, p);
4242 out:
4243 if (ret)
4244 fs_path_free(p);
4245 return ret;
4248 static int __record_new_ref(int num, u64 dir, int index,
4249 struct fs_path *name,
4250 void *ctx)
4252 struct send_ctx *sctx = ctx;
4253 return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
4257 static int __record_deleted_ref(int num, u64 dir, int index,
4258 struct fs_path *name,
4259 void *ctx)
4261 struct send_ctx *sctx = ctx;
4262 return record_ref(sctx->parent_root, dir, name, ctx,
4263 &sctx->deleted_refs);
4266 static int record_new_ref(struct send_ctx *sctx)
4268 int ret;
4270 ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4271 sctx->cmp_key, 0, __record_new_ref, sctx);
4272 if (ret < 0)
4273 goto out;
4274 ret = 0;
4276 out:
4277 return ret;
4280 static int record_deleted_ref(struct send_ctx *sctx)
4282 int ret;
4284 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4285 sctx->cmp_key, 0, __record_deleted_ref, sctx);
4286 if (ret < 0)
4287 goto out;
4288 ret = 0;
4290 out:
4291 return ret;
4294 struct find_ref_ctx {
4295 u64 dir;
4296 u64 dir_gen;
4297 struct btrfs_root *root;
4298 struct fs_path *name;
4299 int found_idx;
4302 static int __find_iref(int num, u64 dir, int index,
4303 struct fs_path *name,
4304 void *ctx_)
4306 struct find_ref_ctx *ctx = ctx_;
4307 u64 dir_gen;
4308 int ret;
4310 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
4311 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
4313 * To avoid doing extra lookups we'll only do this if everything
4314 * else matches.
4316 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
4317 NULL, NULL, NULL);
4318 if (ret)
4319 return ret;
4320 if (dir_gen != ctx->dir_gen)
4321 return 0;
4322 ctx->found_idx = num;
4323 return 1;
4325 return 0;
4328 static int find_iref(struct btrfs_root *root,
4329 struct btrfs_path *path,
4330 struct btrfs_key *key,
4331 u64 dir, u64 dir_gen, struct fs_path *name)
4333 int ret;
4334 struct find_ref_ctx ctx;
4336 ctx.dir = dir;
4337 ctx.name = name;
4338 ctx.dir_gen = dir_gen;
4339 ctx.found_idx = -1;
4340 ctx.root = root;
4342 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
4343 if (ret < 0)
4344 return ret;
4346 if (ctx.found_idx == -1)
4347 return -ENOENT;
4349 return ctx.found_idx;
4352 static int __record_changed_new_ref(int num, u64 dir, int index,
4353 struct fs_path *name,
4354 void *ctx)
4356 u64 dir_gen;
4357 int ret;
4358 struct send_ctx *sctx = ctx;
4360 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
4361 NULL, NULL, NULL);
4362 if (ret)
4363 return ret;
4365 ret = find_iref(sctx->parent_root, sctx->right_path,
4366 sctx->cmp_key, dir, dir_gen, name);
4367 if (ret == -ENOENT)
4368 ret = __record_new_ref(num, dir, index, name, sctx);
4369 else if (ret > 0)
4370 ret = 0;
4372 return ret;
4375 static int __record_changed_deleted_ref(int num, u64 dir, int index,
4376 struct fs_path *name,
4377 void *ctx)
4379 u64 dir_gen;
4380 int ret;
4381 struct send_ctx *sctx = ctx;
4383 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
4384 NULL, NULL, NULL);
4385 if (ret)
4386 return ret;
4388 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
4389 dir, dir_gen, name);
4390 if (ret == -ENOENT)
4391 ret = __record_deleted_ref(num, dir, index, name, sctx);
4392 else if (ret > 0)
4393 ret = 0;
4395 return ret;
4398 static int record_changed_ref(struct send_ctx *sctx)
4400 int ret = 0;
4402 ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4403 sctx->cmp_key, 0, __record_changed_new_ref, sctx);
4404 if (ret < 0)
4405 goto out;
4406 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4407 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
4408 if (ret < 0)
4409 goto out;
4410 ret = 0;
4412 out:
4413 return ret;
4417 * Record and process all refs at once. Needed when an inode changes the
4418 * generation number, which means that it was deleted and recreated.
4420 static int process_all_refs(struct send_ctx *sctx,
4421 enum btrfs_compare_tree_result cmd)
4423 int ret;
4424 struct btrfs_root *root;
4425 struct btrfs_path *path;
4426 struct btrfs_key key;
4427 struct btrfs_key found_key;
4428 struct extent_buffer *eb;
4429 int slot;
4430 iterate_inode_ref_t cb;
4431 int pending_move = 0;
4433 path = alloc_path_for_send();
4434 if (!path)
4435 return -ENOMEM;
4437 if (cmd == BTRFS_COMPARE_TREE_NEW) {
4438 root = sctx->send_root;
4439 cb = __record_new_ref;
4440 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
4441 root = sctx->parent_root;
4442 cb = __record_deleted_ref;
4443 } else {
4444 btrfs_err(sctx->send_root->fs_info,
4445 "Wrong command %d in process_all_refs", cmd);
4446 ret = -EINVAL;
4447 goto out;
4450 key.objectid = sctx->cmp_key->objectid;
4451 key.type = BTRFS_INODE_REF_KEY;
4452 key.offset = 0;
4453 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4454 if (ret < 0)
4455 goto out;
4457 while (1) {
4458 eb = path->nodes[0];
4459 slot = path->slots[0];
4460 if (slot >= btrfs_header_nritems(eb)) {
4461 ret = btrfs_next_leaf(root, path);
4462 if (ret < 0)
4463 goto out;
4464 else if (ret > 0)
4465 break;
4466 continue;
4469 btrfs_item_key_to_cpu(eb, &found_key, slot);
4471 if (found_key.objectid != key.objectid ||
4472 (found_key.type != BTRFS_INODE_REF_KEY &&
4473 found_key.type != BTRFS_INODE_EXTREF_KEY))
4474 break;
4476 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
4477 if (ret < 0)
4478 goto out;
4480 path->slots[0]++;
4482 btrfs_release_path(path);
4485 * We don't actually care about pending_move as we are simply
4486 * re-creating this inode and will be rename'ing it into place once we
4487 * rename the parent directory.
4489 ret = process_recorded_refs(sctx, &pending_move);
4490 out:
4491 btrfs_free_path(path);
4492 return ret;
4495 static int send_set_xattr(struct send_ctx *sctx,
4496 struct fs_path *path,
4497 const char *name, int name_len,
4498 const char *data, int data_len)
4500 int ret = 0;
4502 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
4503 if (ret < 0)
4504 goto out;
4506 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4507 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4508 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
4510 ret = send_cmd(sctx);
4512 tlv_put_failure:
4513 out:
4514 return ret;
4517 static int send_remove_xattr(struct send_ctx *sctx,
4518 struct fs_path *path,
4519 const char *name, int name_len)
4521 int ret = 0;
4523 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
4524 if (ret < 0)
4525 goto out;
4527 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4528 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4530 ret = send_cmd(sctx);
4532 tlv_put_failure:
4533 out:
4534 return ret;
4537 static int __process_new_xattr(int num, struct btrfs_key *di_key,
4538 const char *name, int name_len,
4539 const char *data, int data_len,
4540 u8 type, void *ctx)
4542 int ret;
4543 struct send_ctx *sctx = ctx;
4544 struct fs_path *p;
4545 struct posix_acl_xattr_header dummy_acl;
4547 /* Capabilities are emitted by finish_inode_if_needed */
4548 if (!strncmp(name, XATTR_NAME_CAPS, name_len))
4549 return 0;
4551 p = fs_path_alloc();
4552 if (!p)
4553 return -ENOMEM;
4556 * This hack is needed because empty acls are stored as zero byte
4557 * data in xattrs. Problem with that is, that receiving these zero byte
4558 * acls will fail later. To fix this, we send a dummy acl list that
4559 * only contains the version number and no entries.
4561 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
4562 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
4563 if (data_len == 0) {
4564 dummy_acl.a_version =
4565 cpu_to_le32(POSIX_ACL_XATTR_VERSION);
4566 data = (char *)&dummy_acl;
4567 data_len = sizeof(dummy_acl);
4571 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4572 if (ret < 0)
4573 goto out;
4575 ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
4577 out:
4578 fs_path_free(p);
4579 return ret;
4582 static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
4583 const char *name, int name_len,
4584 const char *data, int data_len,
4585 u8 type, void *ctx)
4587 int ret;
4588 struct send_ctx *sctx = ctx;
4589 struct fs_path *p;
4591 p = fs_path_alloc();
4592 if (!p)
4593 return -ENOMEM;
4595 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4596 if (ret < 0)
4597 goto out;
4599 ret = send_remove_xattr(sctx, p, name, name_len);
4601 out:
4602 fs_path_free(p);
4603 return ret;
4606 static int process_new_xattr(struct send_ctx *sctx)
4608 int ret = 0;
4610 ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4611 __process_new_xattr, sctx);
4613 return ret;
4616 static int process_deleted_xattr(struct send_ctx *sctx)
4618 return iterate_dir_item(sctx->parent_root, sctx->right_path,
4619 __process_deleted_xattr, sctx);
4622 struct find_xattr_ctx {
4623 const char *name;
4624 int name_len;
4625 int found_idx;
4626 char *found_data;
4627 int found_data_len;
4630 static int __find_xattr(int num, struct btrfs_key *di_key,
4631 const char *name, int name_len,
4632 const char *data, int data_len,
4633 u8 type, void *vctx)
4635 struct find_xattr_ctx *ctx = vctx;
4637 if (name_len == ctx->name_len &&
4638 strncmp(name, ctx->name, name_len) == 0) {
4639 ctx->found_idx = num;
4640 ctx->found_data_len = data_len;
4641 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
4642 if (!ctx->found_data)
4643 return -ENOMEM;
4644 return 1;
4646 return 0;
4649 static int find_xattr(struct btrfs_root *root,
4650 struct btrfs_path *path,
4651 struct btrfs_key *key,
4652 const char *name, int name_len,
4653 char **data, int *data_len)
4655 int ret;
4656 struct find_xattr_ctx ctx;
4658 ctx.name = name;
4659 ctx.name_len = name_len;
4660 ctx.found_idx = -1;
4661 ctx.found_data = NULL;
4662 ctx.found_data_len = 0;
4664 ret = iterate_dir_item(root, path, __find_xattr, &ctx);
4665 if (ret < 0)
4666 return ret;
4668 if (ctx.found_idx == -1)
4669 return -ENOENT;
4670 if (data) {
4671 *data = ctx.found_data;
4672 *data_len = ctx.found_data_len;
4673 } else {
4674 kfree(ctx.found_data);
4676 return ctx.found_idx;
4680 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4681 const char *name, int name_len,
4682 const char *data, int data_len,
4683 u8 type, void *ctx)
4685 int ret;
4686 struct send_ctx *sctx = ctx;
4687 char *found_data = NULL;
4688 int found_data_len = 0;
4690 ret = find_xattr(sctx->parent_root, sctx->right_path,
4691 sctx->cmp_key, name, name_len, &found_data,
4692 &found_data_len);
4693 if (ret == -ENOENT) {
4694 ret = __process_new_xattr(num, di_key, name, name_len, data,
4695 data_len, type, ctx);
4696 } else if (ret >= 0) {
4697 if (data_len != found_data_len ||
4698 memcmp(data, found_data, data_len)) {
4699 ret = __process_new_xattr(num, di_key, name, name_len,
4700 data, data_len, type, ctx);
4701 } else {
4702 ret = 0;
4706 kfree(found_data);
4707 return ret;
4710 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4711 const char *name, int name_len,
4712 const char *data, int data_len,
4713 u8 type, void *ctx)
4715 int ret;
4716 struct send_ctx *sctx = ctx;
4718 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4719 name, name_len, NULL, NULL);
4720 if (ret == -ENOENT)
4721 ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4722 data_len, type, ctx);
4723 else if (ret >= 0)
4724 ret = 0;
4726 return ret;
4729 static int process_changed_xattr(struct send_ctx *sctx)
4731 int ret = 0;
4733 ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4734 __process_changed_new_xattr, sctx);
4735 if (ret < 0)
4736 goto out;
4737 ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4738 __process_changed_deleted_xattr, sctx);
4740 out:
4741 return ret;
4744 static int process_all_new_xattrs(struct send_ctx *sctx)
4746 int ret;
4747 struct btrfs_root *root;
4748 struct btrfs_path *path;
4749 struct btrfs_key key;
4750 struct btrfs_key found_key;
4751 struct extent_buffer *eb;
4752 int slot;
4754 path = alloc_path_for_send();
4755 if (!path)
4756 return -ENOMEM;
4758 root = sctx->send_root;
4760 key.objectid = sctx->cmp_key->objectid;
4761 key.type = BTRFS_XATTR_ITEM_KEY;
4762 key.offset = 0;
4763 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4764 if (ret < 0)
4765 goto out;
4767 while (1) {
4768 eb = path->nodes[0];
4769 slot = path->slots[0];
4770 if (slot >= btrfs_header_nritems(eb)) {
4771 ret = btrfs_next_leaf(root, path);
4772 if (ret < 0) {
4773 goto out;
4774 } else if (ret > 0) {
4775 ret = 0;
4776 break;
4778 continue;
4781 btrfs_item_key_to_cpu(eb, &found_key, slot);
4782 if (found_key.objectid != key.objectid ||
4783 found_key.type != key.type) {
4784 ret = 0;
4785 goto out;
4788 ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
4789 if (ret < 0)
4790 goto out;
4792 path->slots[0]++;
4795 out:
4796 btrfs_free_path(path);
4797 return ret;
4800 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4802 struct btrfs_root *root = sctx->send_root;
4803 struct btrfs_fs_info *fs_info = root->fs_info;
4804 struct inode *inode;
4805 struct page *page;
4806 char *addr;
4807 struct btrfs_key key;
4808 pgoff_t index = offset >> PAGE_SHIFT;
4809 pgoff_t last_index;
4810 unsigned pg_offset = offset & ~PAGE_MASK;
4811 ssize_t ret = 0;
4813 key.objectid = sctx->cur_ino;
4814 key.type = BTRFS_INODE_ITEM_KEY;
4815 key.offset = 0;
4817 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4818 if (IS_ERR(inode))
4819 return PTR_ERR(inode);
4821 if (offset + len > i_size_read(inode)) {
4822 if (offset > i_size_read(inode))
4823 len = 0;
4824 else
4825 len = offset - i_size_read(inode);
4827 if (len == 0)
4828 goto out;
4830 last_index = (offset + len - 1) >> PAGE_SHIFT;
4832 /* initial readahead */
4833 memset(&sctx->ra, 0, sizeof(struct file_ra_state));
4834 file_ra_state_init(&sctx->ra, inode->i_mapping);
4836 while (index <= last_index) {
4837 unsigned cur_len = min_t(unsigned, len,
4838 PAGE_SIZE - pg_offset);
4840 page = find_lock_page(inode->i_mapping, index);
4841 if (!page) {
4842 page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
4843 NULL, index, last_index + 1 - index);
4845 page = find_or_create_page(inode->i_mapping, index,
4846 GFP_KERNEL);
4847 if (!page) {
4848 ret = -ENOMEM;
4849 break;
4853 if (PageReadahead(page)) {
4854 page_cache_async_readahead(inode->i_mapping, &sctx->ra,
4855 NULL, page, index, last_index + 1 - index);
4858 if (!PageUptodate(page)) {
4859 btrfs_readpage(NULL, page);
4860 lock_page(page);
4861 if (!PageUptodate(page)) {
4862 unlock_page(page);
4863 put_page(page);
4864 ret = -EIO;
4865 break;
4869 addr = kmap(page);
4870 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4871 kunmap(page);
4872 unlock_page(page);
4873 put_page(page);
4874 index++;
4875 pg_offset = 0;
4876 len -= cur_len;
4877 ret += cur_len;
4879 out:
4880 iput(inode);
4881 return ret;
4885 * Read some bytes from the current inode/file and send a write command to
4886 * user space.
4888 static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4890 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
4891 int ret = 0;
4892 struct fs_path *p;
4893 ssize_t num_read = 0;
4895 p = fs_path_alloc();
4896 if (!p)
4897 return -ENOMEM;
4899 btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
4901 num_read = fill_read_buf(sctx, offset, len);
4902 if (num_read <= 0) {
4903 if (num_read < 0)
4904 ret = num_read;
4905 goto out;
4908 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4909 if (ret < 0)
4910 goto out;
4912 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4913 if (ret < 0)
4914 goto out;
4916 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4917 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4918 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4920 ret = send_cmd(sctx);
4922 tlv_put_failure:
4923 out:
4924 fs_path_free(p);
4925 if (ret < 0)
4926 return ret;
4927 return num_read;
4931 * Send a clone command to user space.
4933 static int send_clone(struct send_ctx *sctx,
4934 u64 offset, u32 len,
4935 struct clone_root *clone_root)
4937 int ret = 0;
4938 struct fs_path *p;
4939 u64 gen;
4941 btrfs_debug(sctx->send_root->fs_info,
4942 "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
4943 offset, len, clone_root->root->objectid, clone_root->ino,
4944 clone_root->offset);
4946 p = fs_path_alloc();
4947 if (!p)
4948 return -ENOMEM;
4950 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4951 if (ret < 0)
4952 goto out;
4954 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4955 if (ret < 0)
4956 goto out;
4958 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4959 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4960 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4962 if (clone_root->root == sctx->send_root) {
4963 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4964 &gen, NULL, NULL, NULL, NULL);
4965 if (ret < 0)
4966 goto out;
4967 ret = get_cur_path(sctx, clone_root->ino, gen, p);
4968 } else {
4969 ret = get_inode_path(clone_root->root, clone_root->ino, p);
4971 if (ret < 0)
4972 goto out;
4975 * If the parent we're using has a received_uuid set then use that as
4976 * our clone source as that is what we will look for when doing a
4977 * receive.
4979 * This covers the case that we create a snapshot off of a received
4980 * subvolume and then use that as the parent and try to receive on a
4981 * different host.
4983 if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
4984 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4985 clone_root->root->root_item.received_uuid);
4986 else
4987 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4988 clone_root->root->root_item.uuid);
4989 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4990 le64_to_cpu(clone_root->root->root_item.ctransid));
4991 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4992 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4993 clone_root->offset);
4995 ret = send_cmd(sctx);
4997 tlv_put_failure:
4998 out:
4999 fs_path_free(p);
5000 return ret;
5004 * Send an update extent command to user space.
5006 static int send_update_extent(struct send_ctx *sctx,
5007 u64 offset, u32 len)
5009 int ret = 0;
5010 struct fs_path *p;
5012 p = fs_path_alloc();
5013 if (!p)
5014 return -ENOMEM;
5016 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
5017 if (ret < 0)
5018 goto out;
5020 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5021 if (ret < 0)
5022 goto out;
5024 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5025 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5026 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
5028 ret = send_cmd(sctx);
5030 tlv_put_failure:
5031 out:
5032 fs_path_free(p);
5033 return ret;
5036 static int send_hole(struct send_ctx *sctx, u64 end)
5038 struct fs_path *p = NULL;
5039 u64 offset = sctx->cur_inode_last_extent;
5040 u64 len;
5041 int ret = 0;
5044 * A hole that starts at EOF or beyond it. Since we do not yet support
5045 * fallocate (for extent preallocation and hole punching), sending a
5046 * write of zeroes starting at EOF or beyond would later require issuing
5047 * a truncate operation which would undo the write and achieve nothing.
5049 if (offset >= sctx->cur_inode_size)
5050 return 0;
5053 * Don't go beyond the inode's i_size due to prealloc extents that start
5054 * after the i_size.
5056 end = min_t(u64, end, sctx->cur_inode_size);
5058 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5059 return send_update_extent(sctx, offset, end - offset);
5061 p = fs_path_alloc();
5062 if (!p)
5063 return -ENOMEM;
5064 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5065 if (ret < 0)
5066 goto tlv_put_failure;
5067 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
5068 while (offset < end) {
5069 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
5071 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
5072 if (ret < 0)
5073 break;
5074 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5075 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5076 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
5077 ret = send_cmd(sctx);
5078 if (ret < 0)
5079 break;
5080 offset += len;
5082 sctx->cur_inode_next_write_offset = offset;
5083 tlv_put_failure:
5084 fs_path_free(p);
5085 return ret;
5088 static int send_extent_data(struct send_ctx *sctx,
5089 const u64 offset,
5090 const u64 len)
5092 u64 sent = 0;
5094 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5095 return send_update_extent(sctx, offset, len);
5097 while (sent < len) {
5098 u64 size = len - sent;
5099 int ret;
5101 if (size > BTRFS_SEND_READ_SIZE)
5102 size = BTRFS_SEND_READ_SIZE;
5103 ret = send_write(sctx, offset + sent, size);
5104 if (ret < 0)
5105 return ret;
5106 if (!ret)
5107 break;
5108 sent += ret;
5110 return 0;
5114 * Search for a capability xattr related to sctx->cur_ino. If the capability is
5115 * found, call send_set_xattr function to emit it.
5117 * Return 0 if there isn't a capability, or when the capability was emitted
5118 * successfully, or < 0 if an error occurred.
5120 static int send_capabilities(struct send_ctx *sctx)
5122 struct fs_path *fspath = NULL;
5123 struct btrfs_path *path;
5124 struct btrfs_dir_item *di;
5125 struct extent_buffer *leaf;
5126 unsigned long data_ptr;
5127 char *buf = NULL;
5128 int buf_len;
5129 int ret = 0;
5131 path = alloc_path_for_send();
5132 if (!path)
5133 return -ENOMEM;
5135 di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
5136 XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
5137 if (!di) {
5138 /* There is no xattr for this inode */
5139 goto out;
5140 } else if (IS_ERR(di)) {
5141 ret = PTR_ERR(di);
5142 goto out;
5145 leaf = path->nodes[0];
5146 buf_len = btrfs_dir_data_len(leaf, di);
5148 fspath = fs_path_alloc();
5149 buf = kmalloc(buf_len, GFP_KERNEL);
5150 if (!fspath || !buf) {
5151 ret = -ENOMEM;
5152 goto out;
5155 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
5156 if (ret < 0)
5157 goto out;
5159 data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
5160 read_extent_buffer(leaf, buf, data_ptr, buf_len);
5162 ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
5163 strlen(XATTR_NAME_CAPS), buf, buf_len);
5164 out:
5165 kfree(buf);
5166 fs_path_free(fspath);
5167 btrfs_free_path(path);
5168 return ret;
5171 static int clone_range(struct send_ctx *sctx,
5172 struct clone_root *clone_root,
5173 const u64 disk_byte,
5174 u64 data_offset,
5175 u64 offset,
5176 u64 len)
5178 struct btrfs_path *path;
5179 struct btrfs_key key;
5180 int ret;
5183 * Prevent cloning from a zero offset with a length matching the sector
5184 * size because in some scenarios this will make the receiver fail.
5186 * For example, if in the source filesystem the extent at offset 0
5187 * has a length of sectorsize and it was written using direct IO, then
5188 * it can never be an inline extent (even if compression is enabled).
5189 * Then this extent can be cloned in the original filesystem to a non
5190 * zero file offset, but it may not be possible to clone in the
5191 * destination filesystem because it can be inlined due to compression
5192 * on the destination filesystem (as the receiver's write operations are
5193 * always done using buffered IO). The same happens when the original
5194 * filesystem does not have compression enabled but the destination
5195 * filesystem has.
5197 if (clone_root->offset == 0 &&
5198 len == sctx->send_root->fs_info->sectorsize)
5199 return send_extent_data(sctx, offset, len);
5201 path = alloc_path_for_send();
5202 if (!path)
5203 return -ENOMEM;
5206 * We can't send a clone operation for the entire range if we find
5207 * extent items in the respective range in the source file that
5208 * refer to different extents or if we find holes.
5209 * So check for that and do a mix of clone and regular write/copy
5210 * operations if needed.
5212 * Example:
5214 * mkfs.btrfs -f /dev/sda
5215 * mount /dev/sda /mnt
5216 * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
5217 * cp --reflink=always /mnt/foo /mnt/bar
5218 * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
5219 * btrfs subvolume snapshot -r /mnt /mnt/snap
5221 * If when we send the snapshot and we are processing file bar (which
5222 * has a higher inode number than foo) we blindly send a clone operation
5223 * for the [0, 100K[ range from foo to bar, the receiver ends up getting
5224 * a file bar that matches the content of file foo - iow, doesn't match
5225 * the content from bar in the original filesystem.
5227 key.objectid = clone_root->ino;
5228 key.type = BTRFS_EXTENT_DATA_KEY;
5229 key.offset = clone_root->offset;
5230 ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
5231 if (ret < 0)
5232 goto out;
5233 if (ret > 0 && path->slots[0] > 0) {
5234 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
5235 if (key.objectid == clone_root->ino &&
5236 key.type == BTRFS_EXTENT_DATA_KEY)
5237 path->slots[0]--;
5240 while (true) {
5241 struct extent_buffer *leaf = path->nodes[0];
5242 int slot = path->slots[0];
5243 struct btrfs_file_extent_item *ei;
5244 u8 type;
5245 u64 ext_len;
5246 u64 clone_len;
5248 if (slot >= btrfs_header_nritems(leaf)) {
5249 ret = btrfs_next_leaf(clone_root->root, path);
5250 if (ret < 0)
5251 goto out;
5252 else if (ret > 0)
5253 break;
5254 continue;
5257 btrfs_item_key_to_cpu(leaf, &key, slot);
5260 * We might have an implicit trailing hole (NO_HOLES feature
5261 * enabled). We deal with it after leaving this loop.
5263 if (key.objectid != clone_root->ino ||
5264 key.type != BTRFS_EXTENT_DATA_KEY)
5265 break;
5267 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5268 type = btrfs_file_extent_type(leaf, ei);
5269 if (type == BTRFS_FILE_EXTENT_INLINE) {
5270 ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
5271 ext_len = PAGE_ALIGN(ext_len);
5272 } else {
5273 ext_len = btrfs_file_extent_num_bytes(leaf, ei);
5276 if (key.offset + ext_len <= clone_root->offset)
5277 goto next;
5279 if (key.offset > clone_root->offset) {
5280 /* Implicit hole, NO_HOLES feature enabled. */
5281 u64 hole_len = key.offset - clone_root->offset;
5283 if (hole_len > len)
5284 hole_len = len;
5285 ret = send_extent_data(sctx, offset, hole_len);
5286 if (ret < 0)
5287 goto out;
5289 len -= hole_len;
5290 if (len == 0)
5291 break;
5292 offset += hole_len;
5293 clone_root->offset += hole_len;
5294 data_offset += hole_len;
5297 if (key.offset >= clone_root->offset + len)
5298 break;
5300 clone_len = min_t(u64, ext_len, len);
5302 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
5303 btrfs_file_extent_offset(leaf, ei) == data_offset)
5304 ret = send_clone(sctx, offset, clone_len, clone_root);
5305 else
5306 ret = send_extent_data(sctx, offset, clone_len);
5308 if (ret < 0)
5309 goto out;
5311 len -= clone_len;
5312 if (len == 0)
5313 break;
5314 offset += clone_len;
5315 clone_root->offset += clone_len;
5316 data_offset += clone_len;
5317 next:
5318 path->slots[0]++;
5321 if (len > 0)
5322 ret = send_extent_data(sctx, offset, len);
5323 else
5324 ret = 0;
5325 out:
5326 btrfs_free_path(path);
5327 return ret;
5330 static int send_write_or_clone(struct send_ctx *sctx,
5331 struct btrfs_path *path,
5332 struct btrfs_key *key,
5333 struct clone_root *clone_root)
5335 int ret = 0;
5336 struct btrfs_file_extent_item *ei;
5337 u64 offset = key->offset;
5338 u64 len;
5339 u8 type;
5340 u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
5342 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5343 struct btrfs_file_extent_item);
5344 type = btrfs_file_extent_type(path->nodes[0], ei);
5345 if (type == BTRFS_FILE_EXTENT_INLINE) {
5346 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
5348 * it is possible the inline item won't cover the whole page,
5349 * but there may be items after this page. Make
5350 * sure to send the whole thing
5352 len = PAGE_ALIGN(len);
5353 } else {
5354 len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
5357 if (offset >= sctx->cur_inode_size) {
5358 ret = 0;
5359 goto out;
5361 if (offset + len > sctx->cur_inode_size)
5362 len = sctx->cur_inode_size - offset;
5363 if (len == 0) {
5364 ret = 0;
5365 goto out;
5368 if (clone_root && IS_ALIGNED(offset + len, bs)) {
5369 u64 disk_byte;
5370 u64 data_offset;
5372 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
5373 data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
5374 ret = clone_range(sctx, clone_root, disk_byte, data_offset,
5375 offset, len);
5376 } else {
5377 ret = send_extent_data(sctx, offset, len);
5379 sctx->cur_inode_next_write_offset = offset + len;
5380 out:
5381 return ret;
5384 static int is_extent_unchanged(struct send_ctx *sctx,
5385 struct btrfs_path *left_path,
5386 struct btrfs_key *ekey)
5388 int ret = 0;
5389 struct btrfs_key key;
5390 struct btrfs_path *path = NULL;
5391 struct extent_buffer *eb;
5392 int slot;
5393 struct btrfs_key found_key;
5394 struct btrfs_file_extent_item *ei;
5395 u64 left_disknr;
5396 u64 right_disknr;
5397 u64 left_offset;
5398 u64 right_offset;
5399 u64 left_offset_fixed;
5400 u64 left_len;
5401 u64 right_len;
5402 u64 left_gen;
5403 u64 right_gen;
5404 u8 left_type;
5405 u8 right_type;
5407 path = alloc_path_for_send();
5408 if (!path)
5409 return -ENOMEM;
5411 eb = left_path->nodes[0];
5412 slot = left_path->slots[0];
5413 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5414 left_type = btrfs_file_extent_type(eb, ei);
5416 if (left_type != BTRFS_FILE_EXTENT_REG) {
5417 ret = 0;
5418 goto out;
5420 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5421 left_len = btrfs_file_extent_num_bytes(eb, ei);
5422 left_offset = btrfs_file_extent_offset(eb, ei);
5423 left_gen = btrfs_file_extent_generation(eb, ei);
5426 * Following comments will refer to these graphics. L is the left
5427 * extents which we are checking at the moment. 1-8 are the right
5428 * extents that we iterate.
5430 * |-----L-----|
5431 * |-1-|-2a-|-3-|-4-|-5-|-6-|
5433 * |-----L-----|
5434 * |--1--|-2b-|...(same as above)
5436 * Alternative situation. Happens on files where extents got split.
5437 * |-----L-----|
5438 * |-----------7-----------|-6-|
5440 * Alternative situation. Happens on files which got larger.
5441 * |-----L-----|
5442 * |-8-|
5443 * Nothing follows after 8.
5446 key.objectid = ekey->objectid;
5447 key.type = BTRFS_EXTENT_DATA_KEY;
5448 key.offset = ekey->offset;
5449 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
5450 if (ret < 0)
5451 goto out;
5452 if (ret) {
5453 ret = 0;
5454 goto out;
5458 * Handle special case where the right side has no extents at all.
5460 eb = path->nodes[0];
5461 slot = path->slots[0];
5462 btrfs_item_key_to_cpu(eb, &found_key, slot);
5463 if (found_key.objectid != key.objectid ||
5464 found_key.type != key.type) {
5465 /* If we're a hole then just pretend nothing changed */
5466 ret = (left_disknr) ? 0 : 1;
5467 goto out;
5471 * We're now on 2a, 2b or 7.
5473 key = found_key;
5474 while (key.offset < ekey->offset + left_len) {
5475 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5476 right_type = btrfs_file_extent_type(eb, ei);
5477 if (right_type != BTRFS_FILE_EXTENT_REG &&
5478 right_type != BTRFS_FILE_EXTENT_INLINE) {
5479 ret = 0;
5480 goto out;
5483 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5484 right_len = btrfs_file_extent_ram_bytes(eb, ei);
5485 right_len = PAGE_ALIGN(right_len);
5486 } else {
5487 right_len = btrfs_file_extent_num_bytes(eb, ei);
5491 * Are we at extent 8? If yes, we know the extent is changed.
5492 * This may only happen on the first iteration.
5494 if (found_key.offset + right_len <= ekey->offset) {
5495 /* If we're a hole just pretend nothing changed */
5496 ret = (left_disknr) ? 0 : 1;
5497 goto out;
5501 * We just wanted to see if when we have an inline extent, what
5502 * follows it is a regular extent (wanted to check the above
5503 * condition for inline extents too). This should normally not
5504 * happen but it's possible for example when we have an inline
5505 * compressed extent representing data with a size matching
5506 * the page size (currently the same as sector size).
5508 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5509 ret = 0;
5510 goto out;
5513 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5514 right_offset = btrfs_file_extent_offset(eb, ei);
5515 right_gen = btrfs_file_extent_generation(eb, ei);
5517 left_offset_fixed = left_offset;
5518 if (key.offset < ekey->offset) {
5519 /* Fix the right offset for 2a and 7. */
5520 right_offset += ekey->offset - key.offset;
5521 } else {
5522 /* Fix the left offset for all behind 2a and 2b */
5523 left_offset_fixed += key.offset - ekey->offset;
5527 * Check if we have the same extent.
5529 if (left_disknr != right_disknr ||
5530 left_offset_fixed != right_offset ||
5531 left_gen != right_gen) {
5532 ret = 0;
5533 goto out;
5537 * Go to the next extent.
5539 ret = btrfs_next_item(sctx->parent_root, path);
5540 if (ret < 0)
5541 goto out;
5542 if (!ret) {
5543 eb = path->nodes[0];
5544 slot = path->slots[0];
5545 btrfs_item_key_to_cpu(eb, &found_key, slot);
5547 if (ret || found_key.objectid != key.objectid ||
5548 found_key.type != key.type) {
5549 key.offset += right_len;
5550 break;
5552 if (found_key.offset != key.offset + right_len) {
5553 ret = 0;
5554 goto out;
5556 key = found_key;
5560 * We're now behind the left extent (treat as unchanged) or at the end
5561 * of the right side (treat as changed).
5563 if (key.offset >= ekey->offset + left_len)
5564 ret = 1;
5565 else
5566 ret = 0;
5569 out:
5570 btrfs_free_path(path);
5571 return ret;
5574 static int get_last_extent(struct send_ctx *sctx, u64 offset)
5576 struct btrfs_path *path;
5577 struct btrfs_root *root = sctx->send_root;
5578 struct btrfs_file_extent_item *fi;
5579 struct btrfs_key key;
5580 u64 extent_end;
5581 u8 type;
5582 int ret;
5584 path = alloc_path_for_send();
5585 if (!path)
5586 return -ENOMEM;
5588 sctx->cur_inode_last_extent = 0;
5590 key.objectid = sctx->cur_ino;
5591 key.type = BTRFS_EXTENT_DATA_KEY;
5592 key.offset = offset;
5593 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
5594 if (ret < 0)
5595 goto out;
5596 ret = 0;
5597 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5598 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
5599 goto out;
5601 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5602 struct btrfs_file_extent_item);
5603 type = btrfs_file_extent_type(path->nodes[0], fi);
5604 if (type == BTRFS_FILE_EXTENT_INLINE) {
5605 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5606 extent_end = ALIGN(key.offset + size,
5607 sctx->send_root->fs_info->sectorsize);
5608 } else {
5609 extent_end = key.offset +
5610 btrfs_file_extent_num_bytes(path->nodes[0], fi);
5612 sctx->cur_inode_last_extent = extent_end;
5613 out:
5614 btrfs_free_path(path);
5615 return ret;
5618 static int range_is_hole_in_parent(struct send_ctx *sctx,
5619 const u64 start,
5620 const u64 end)
5622 struct btrfs_path *path;
5623 struct btrfs_key key;
5624 struct btrfs_root *root = sctx->parent_root;
5625 u64 search_start = start;
5626 int ret;
5628 path = alloc_path_for_send();
5629 if (!path)
5630 return -ENOMEM;
5632 key.objectid = sctx->cur_ino;
5633 key.type = BTRFS_EXTENT_DATA_KEY;
5634 key.offset = search_start;
5635 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5636 if (ret < 0)
5637 goto out;
5638 if (ret > 0 && path->slots[0] > 0)
5639 path->slots[0]--;
5641 while (search_start < end) {
5642 struct extent_buffer *leaf = path->nodes[0];
5643 int slot = path->slots[0];
5644 struct btrfs_file_extent_item *fi;
5645 u64 extent_end;
5647 if (slot >= btrfs_header_nritems(leaf)) {
5648 ret = btrfs_next_leaf(root, path);
5649 if (ret < 0)
5650 goto out;
5651 else if (ret > 0)
5652 break;
5653 continue;
5656 btrfs_item_key_to_cpu(leaf, &key, slot);
5657 if (key.objectid < sctx->cur_ino ||
5658 key.type < BTRFS_EXTENT_DATA_KEY)
5659 goto next;
5660 if (key.objectid > sctx->cur_ino ||
5661 key.type > BTRFS_EXTENT_DATA_KEY ||
5662 key.offset >= end)
5663 break;
5665 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5666 if (btrfs_file_extent_type(leaf, fi) ==
5667 BTRFS_FILE_EXTENT_INLINE) {
5668 u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
5670 extent_end = ALIGN(key.offset + size,
5671 root->fs_info->sectorsize);
5672 } else {
5673 extent_end = key.offset +
5674 btrfs_file_extent_num_bytes(leaf, fi);
5676 if (extent_end <= start)
5677 goto next;
5678 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
5679 search_start = extent_end;
5680 goto next;
5682 ret = 0;
5683 goto out;
5684 next:
5685 path->slots[0]++;
5687 ret = 1;
5688 out:
5689 btrfs_free_path(path);
5690 return ret;
5693 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
5694 struct btrfs_key *key)
5696 struct btrfs_file_extent_item *fi;
5697 u64 extent_end;
5698 u8 type;
5699 int ret = 0;
5701 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
5702 return 0;
5704 if (sctx->cur_inode_last_extent == (u64)-1) {
5705 ret = get_last_extent(sctx, key->offset - 1);
5706 if (ret)
5707 return ret;
5710 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5711 struct btrfs_file_extent_item);
5712 type = btrfs_file_extent_type(path->nodes[0], fi);
5713 if (type == BTRFS_FILE_EXTENT_INLINE) {
5714 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5715 extent_end = ALIGN(key->offset + size,
5716 sctx->send_root->fs_info->sectorsize);
5717 } else {
5718 extent_end = key->offset +
5719 btrfs_file_extent_num_bytes(path->nodes[0], fi);
5722 if (path->slots[0] == 0 &&
5723 sctx->cur_inode_last_extent < key->offset) {
5725 * We might have skipped entire leafs that contained only
5726 * file extent items for our current inode. These leafs have
5727 * a generation number smaller (older) than the one in the
5728 * current leaf and the leaf our last extent came from, and
5729 * are located between these 2 leafs.
5731 ret = get_last_extent(sctx, key->offset - 1);
5732 if (ret)
5733 return ret;
5736 if (sctx->cur_inode_last_extent < key->offset) {
5737 ret = range_is_hole_in_parent(sctx,
5738 sctx->cur_inode_last_extent,
5739 key->offset);
5740 if (ret < 0)
5741 return ret;
5742 else if (ret == 0)
5743 ret = send_hole(sctx, key->offset);
5744 else
5745 ret = 0;
5747 sctx->cur_inode_last_extent = extent_end;
5748 return ret;
5751 static int process_extent(struct send_ctx *sctx,
5752 struct btrfs_path *path,
5753 struct btrfs_key *key)
5755 struct clone_root *found_clone = NULL;
5756 int ret = 0;
5758 if (S_ISLNK(sctx->cur_inode_mode))
5759 return 0;
5761 if (sctx->parent_root && !sctx->cur_inode_new) {
5762 ret = is_extent_unchanged(sctx, path, key);
5763 if (ret < 0)
5764 goto out;
5765 if (ret) {
5766 ret = 0;
5767 goto out_hole;
5769 } else {
5770 struct btrfs_file_extent_item *ei;
5771 u8 type;
5773 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5774 struct btrfs_file_extent_item);
5775 type = btrfs_file_extent_type(path->nodes[0], ei);
5776 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
5777 type == BTRFS_FILE_EXTENT_REG) {
5779 * The send spec does not have a prealloc command yet,
5780 * so just leave a hole for prealloc'ed extents until
5781 * we have enough commands queued up to justify rev'ing
5782 * the send spec.
5784 if (type == BTRFS_FILE_EXTENT_PREALLOC) {
5785 ret = 0;
5786 goto out;
5789 /* Have a hole, just skip it. */
5790 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
5791 ret = 0;
5792 goto out;
5797 ret = find_extent_clone(sctx, path, key->objectid, key->offset,
5798 sctx->cur_inode_size, &found_clone);
5799 if (ret != -ENOENT && ret < 0)
5800 goto out;
5802 ret = send_write_or_clone(sctx, path, key, found_clone);
5803 if (ret)
5804 goto out;
5805 out_hole:
5806 ret = maybe_send_hole(sctx, path, key);
5807 out:
5808 return ret;
5811 static int process_all_extents(struct send_ctx *sctx)
5813 int ret;
5814 struct btrfs_root *root;
5815 struct btrfs_path *path;
5816 struct btrfs_key key;
5817 struct btrfs_key found_key;
5818 struct extent_buffer *eb;
5819 int slot;
5821 root = sctx->send_root;
5822 path = alloc_path_for_send();
5823 if (!path)
5824 return -ENOMEM;
5826 key.objectid = sctx->cmp_key->objectid;
5827 key.type = BTRFS_EXTENT_DATA_KEY;
5828 key.offset = 0;
5829 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5830 if (ret < 0)
5831 goto out;
5833 while (1) {
5834 eb = path->nodes[0];
5835 slot = path->slots[0];
5837 if (slot >= btrfs_header_nritems(eb)) {
5838 ret = btrfs_next_leaf(root, path);
5839 if (ret < 0) {
5840 goto out;
5841 } else if (ret > 0) {
5842 ret = 0;
5843 break;
5845 continue;
5848 btrfs_item_key_to_cpu(eb, &found_key, slot);
5850 if (found_key.objectid != key.objectid ||
5851 found_key.type != key.type) {
5852 ret = 0;
5853 goto out;
5856 ret = process_extent(sctx, path, &found_key);
5857 if (ret < 0)
5858 goto out;
5860 path->slots[0]++;
5863 out:
5864 btrfs_free_path(path);
5865 return ret;
5868 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
5869 int *pending_move,
5870 int *refs_processed)
5872 int ret = 0;
5874 if (sctx->cur_ino == 0)
5875 goto out;
5876 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
5877 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
5878 goto out;
5879 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
5880 goto out;
5882 ret = process_recorded_refs(sctx, pending_move);
5883 if (ret < 0)
5884 goto out;
5886 *refs_processed = 1;
5887 out:
5888 return ret;
5891 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5893 int ret = 0;
5894 u64 left_mode;
5895 u64 left_uid;
5896 u64 left_gid;
5897 u64 right_mode;
5898 u64 right_uid;
5899 u64 right_gid;
5900 int need_chmod = 0;
5901 int need_chown = 0;
5902 int need_truncate = 1;
5903 int pending_move = 0;
5904 int refs_processed = 0;
5906 if (sctx->ignore_cur_inode)
5907 return 0;
5909 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5910 &refs_processed);
5911 if (ret < 0)
5912 goto out;
5915 * We have processed the refs and thus need to advance send_progress.
5916 * Now, calls to get_cur_xxx will take the updated refs of the current
5917 * inode into account.
5919 * On the other hand, if our current inode is a directory and couldn't
5920 * be moved/renamed because its parent was renamed/moved too and it has
5921 * a higher inode number, we can only move/rename our current inode
5922 * after we moved/renamed its parent. Therefore in this case operate on
5923 * the old path (pre move/rename) of our current inode, and the
5924 * move/rename will be performed later.
5926 if (refs_processed && !pending_move)
5927 sctx->send_progress = sctx->cur_ino + 1;
5929 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
5930 goto out;
5931 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
5932 goto out;
5934 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
5935 &left_mode, &left_uid, &left_gid, NULL);
5936 if (ret < 0)
5937 goto out;
5939 if (!sctx->parent_root || sctx->cur_inode_new) {
5940 need_chown = 1;
5941 if (!S_ISLNK(sctx->cur_inode_mode))
5942 need_chmod = 1;
5943 if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
5944 need_truncate = 0;
5945 } else {
5946 u64 old_size;
5948 ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
5949 &old_size, NULL, &right_mode, &right_uid,
5950 &right_gid, NULL);
5951 if (ret < 0)
5952 goto out;
5954 if (left_uid != right_uid || left_gid != right_gid)
5955 need_chown = 1;
5956 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
5957 need_chmod = 1;
5958 if ((old_size == sctx->cur_inode_size) ||
5959 (sctx->cur_inode_size > old_size &&
5960 sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
5961 need_truncate = 0;
5964 if (S_ISREG(sctx->cur_inode_mode)) {
5965 if (need_send_hole(sctx)) {
5966 if (sctx->cur_inode_last_extent == (u64)-1 ||
5967 sctx->cur_inode_last_extent <
5968 sctx->cur_inode_size) {
5969 ret = get_last_extent(sctx, (u64)-1);
5970 if (ret)
5971 goto out;
5973 if (sctx->cur_inode_last_extent <
5974 sctx->cur_inode_size) {
5975 ret = send_hole(sctx, sctx->cur_inode_size);
5976 if (ret)
5977 goto out;
5980 if (need_truncate) {
5981 ret = send_truncate(sctx, sctx->cur_ino,
5982 sctx->cur_inode_gen,
5983 sctx->cur_inode_size);
5984 if (ret < 0)
5985 goto out;
5989 if (need_chown) {
5990 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5991 left_uid, left_gid);
5992 if (ret < 0)
5993 goto out;
5995 if (need_chmod) {
5996 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5997 left_mode);
5998 if (ret < 0)
5999 goto out;
6002 ret = send_capabilities(sctx);
6003 if (ret < 0)
6004 goto out;
6007 * If other directory inodes depended on our current directory
6008 * inode's move/rename, now do their move/rename operations.
6010 if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
6011 ret = apply_children_dir_moves(sctx);
6012 if (ret)
6013 goto out;
6015 * Need to send that every time, no matter if it actually
6016 * changed between the two trees as we have done changes to
6017 * the inode before. If our inode is a directory and it's
6018 * waiting to be moved/renamed, we will send its utimes when
6019 * it's moved/renamed, therefore we don't need to do it here.
6021 sctx->send_progress = sctx->cur_ino + 1;
6022 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
6023 if (ret < 0)
6024 goto out;
6027 out:
6028 return ret;
6031 struct parent_paths_ctx {
6032 struct list_head *refs;
6033 struct send_ctx *sctx;
6036 static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
6037 void *ctx)
6039 struct parent_paths_ctx *ppctx = ctx;
6041 return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
6042 ppctx->refs);
6046 * Issue unlink operations for all paths of the current inode found in the
6047 * parent snapshot.
6049 static int btrfs_unlink_all_paths(struct send_ctx *sctx)
6051 LIST_HEAD(deleted_refs);
6052 struct btrfs_path *path;
6053 struct btrfs_key key;
6054 struct parent_paths_ctx ctx;
6055 int ret;
6057 path = alloc_path_for_send();
6058 if (!path)
6059 return -ENOMEM;
6061 key.objectid = sctx->cur_ino;
6062 key.type = BTRFS_INODE_REF_KEY;
6063 key.offset = 0;
6064 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
6065 if (ret < 0)
6066 goto out;
6068 ctx.refs = &deleted_refs;
6069 ctx.sctx = sctx;
6071 while (true) {
6072 struct extent_buffer *eb = path->nodes[0];
6073 int slot = path->slots[0];
6075 if (slot >= btrfs_header_nritems(eb)) {
6076 ret = btrfs_next_leaf(sctx->parent_root, path);
6077 if (ret < 0)
6078 goto out;
6079 else if (ret > 0)
6080 break;
6081 continue;
6084 btrfs_item_key_to_cpu(eb, &key, slot);
6085 if (key.objectid != sctx->cur_ino)
6086 break;
6087 if (key.type != BTRFS_INODE_REF_KEY &&
6088 key.type != BTRFS_INODE_EXTREF_KEY)
6089 break;
6091 ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
6092 record_parent_ref, &ctx);
6093 if (ret < 0)
6094 goto out;
6096 path->slots[0]++;
6099 while (!list_empty(&deleted_refs)) {
6100 struct recorded_ref *ref;
6102 ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
6103 ret = send_unlink(sctx, ref->full_path);
6104 if (ret < 0)
6105 goto out;
6106 fs_path_free(ref->full_path);
6107 list_del(&ref->list);
6108 kfree(ref);
6110 ret = 0;
6111 out:
6112 btrfs_free_path(path);
6113 if (ret)
6114 __free_recorded_refs(&deleted_refs);
6115 return ret;
6118 static int changed_inode(struct send_ctx *sctx,
6119 enum btrfs_compare_tree_result result)
6121 int ret = 0;
6122 struct btrfs_key *key = sctx->cmp_key;
6123 struct btrfs_inode_item *left_ii = NULL;
6124 struct btrfs_inode_item *right_ii = NULL;
6125 u64 left_gen = 0;
6126 u64 right_gen = 0;
6128 sctx->cur_ino = key->objectid;
6129 sctx->cur_inode_new_gen = 0;
6130 sctx->cur_inode_last_extent = (u64)-1;
6131 sctx->cur_inode_next_write_offset = 0;
6132 sctx->ignore_cur_inode = false;
6135 * Set send_progress to current inode. This will tell all get_cur_xxx
6136 * functions that the current inode's refs are not updated yet. Later,
6137 * when process_recorded_refs is finished, it is set to cur_ino + 1.
6139 sctx->send_progress = sctx->cur_ino;
6141 if (result == BTRFS_COMPARE_TREE_NEW ||
6142 result == BTRFS_COMPARE_TREE_CHANGED) {
6143 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
6144 sctx->left_path->slots[0],
6145 struct btrfs_inode_item);
6146 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
6147 left_ii);
6148 } else {
6149 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
6150 sctx->right_path->slots[0],
6151 struct btrfs_inode_item);
6152 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
6153 right_ii);
6155 if (result == BTRFS_COMPARE_TREE_CHANGED) {
6156 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
6157 sctx->right_path->slots[0],
6158 struct btrfs_inode_item);
6160 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
6161 right_ii);
6164 * The cur_ino = root dir case is special here. We can't treat
6165 * the inode as deleted+reused because it would generate a
6166 * stream that tries to delete/mkdir the root dir.
6168 if (left_gen != right_gen &&
6169 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
6170 sctx->cur_inode_new_gen = 1;
6174 * Normally we do not find inodes with a link count of zero (orphans)
6175 * because the most common case is to create a snapshot and use it
6176 * for a send operation. However other less common use cases involve
6177 * using a subvolume and send it after turning it to RO mode just
6178 * after deleting all hard links of a file while holding an open
6179 * file descriptor against it or turning a RO snapshot into RW mode,
6180 * keep an open file descriptor against a file, delete it and then
6181 * turn the snapshot back to RO mode before using it for a send
6182 * operation. So if we find such cases, ignore the inode and all its
6183 * items completely if it's a new inode, or if it's a changed inode
6184 * make sure all its previous paths (from the parent snapshot) are all
6185 * unlinked and all other the inode items are ignored.
6187 if (result == BTRFS_COMPARE_TREE_NEW ||
6188 result == BTRFS_COMPARE_TREE_CHANGED) {
6189 u32 nlinks;
6191 nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
6192 if (nlinks == 0) {
6193 sctx->ignore_cur_inode = true;
6194 if (result == BTRFS_COMPARE_TREE_CHANGED)
6195 ret = btrfs_unlink_all_paths(sctx);
6196 goto out;
6200 if (result == BTRFS_COMPARE_TREE_NEW) {
6201 sctx->cur_inode_gen = left_gen;
6202 sctx->cur_inode_new = 1;
6203 sctx->cur_inode_deleted = 0;
6204 sctx->cur_inode_size = btrfs_inode_size(
6205 sctx->left_path->nodes[0], left_ii);
6206 sctx->cur_inode_mode = btrfs_inode_mode(
6207 sctx->left_path->nodes[0], left_ii);
6208 sctx->cur_inode_rdev = btrfs_inode_rdev(
6209 sctx->left_path->nodes[0], left_ii);
6210 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
6211 ret = send_create_inode_if_needed(sctx);
6212 } else if (result == BTRFS_COMPARE_TREE_DELETED) {
6213 sctx->cur_inode_gen = right_gen;
6214 sctx->cur_inode_new = 0;
6215 sctx->cur_inode_deleted = 1;
6216 sctx->cur_inode_size = btrfs_inode_size(
6217 sctx->right_path->nodes[0], right_ii);
6218 sctx->cur_inode_mode = btrfs_inode_mode(
6219 sctx->right_path->nodes[0], right_ii);
6220 } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
6222 * We need to do some special handling in case the inode was
6223 * reported as changed with a changed generation number. This
6224 * means that the original inode was deleted and new inode
6225 * reused the same inum. So we have to treat the old inode as
6226 * deleted and the new one as new.
6228 if (sctx->cur_inode_new_gen) {
6230 * First, process the inode as if it was deleted.
6232 sctx->cur_inode_gen = right_gen;
6233 sctx->cur_inode_new = 0;
6234 sctx->cur_inode_deleted = 1;
6235 sctx->cur_inode_size = btrfs_inode_size(
6236 sctx->right_path->nodes[0], right_ii);
6237 sctx->cur_inode_mode = btrfs_inode_mode(
6238 sctx->right_path->nodes[0], right_ii);
6239 ret = process_all_refs(sctx,
6240 BTRFS_COMPARE_TREE_DELETED);
6241 if (ret < 0)
6242 goto out;
6245 * Now process the inode as if it was new.
6247 sctx->cur_inode_gen = left_gen;
6248 sctx->cur_inode_new = 1;
6249 sctx->cur_inode_deleted = 0;
6250 sctx->cur_inode_size = btrfs_inode_size(
6251 sctx->left_path->nodes[0], left_ii);
6252 sctx->cur_inode_mode = btrfs_inode_mode(
6253 sctx->left_path->nodes[0], left_ii);
6254 sctx->cur_inode_rdev = btrfs_inode_rdev(
6255 sctx->left_path->nodes[0], left_ii);
6256 ret = send_create_inode_if_needed(sctx);
6257 if (ret < 0)
6258 goto out;
6260 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
6261 if (ret < 0)
6262 goto out;
6264 * Advance send_progress now as we did not get into
6265 * process_recorded_refs_if_needed in the new_gen case.
6267 sctx->send_progress = sctx->cur_ino + 1;
6270 * Now process all extents and xattrs of the inode as if
6271 * they were all new.
6273 ret = process_all_extents(sctx);
6274 if (ret < 0)
6275 goto out;
6276 ret = process_all_new_xattrs(sctx);
6277 if (ret < 0)
6278 goto out;
6279 } else {
6280 sctx->cur_inode_gen = left_gen;
6281 sctx->cur_inode_new = 0;
6282 sctx->cur_inode_new_gen = 0;
6283 sctx->cur_inode_deleted = 0;
6284 sctx->cur_inode_size = btrfs_inode_size(
6285 sctx->left_path->nodes[0], left_ii);
6286 sctx->cur_inode_mode = btrfs_inode_mode(
6287 sctx->left_path->nodes[0], left_ii);
6291 out:
6292 return ret;
6296 * We have to process new refs before deleted refs, but compare_trees gives us
6297 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
6298 * first and later process them in process_recorded_refs.
6299 * For the cur_inode_new_gen case, we skip recording completely because
6300 * changed_inode did already initiate processing of refs. The reason for this is
6301 * that in this case, compare_tree actually compares the refs of 2 different
6302 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
6303 * refs of the right tree as deleted and all refs of the left tree as new.
6305 static int changed_ref(struct send_ctx *sctx,
6306 enum btrfs_compare_tree_result result)
6308 int ret = 0;
6310 if (sctx->cur_ino != sctx->cmp_key->objectid) {
6311 inconsistent_snapshot_error(sctx, result, "reference");
6312 return -EIO;
6315 if (!sctx->cur_inode_new_gen &&
6316 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
6317 if (result == BTRFS_COMPARE_TREE_NEW)
6318 ret = record_new_ref(sctx);
6319 else if (result == BTRFS_COMPARE_TREE_DELETED)
6320 ret = record_deleted_ref(sctx);
6321 else if (result == BTRFS_COMPARE_TREE_CHANGED)
6322 ret = record_changed_ref(sctx);
6325 return ret;
6329 * Process new/deleted/changed xattrs. We skip processing in the
6330 * cur_inode_new_gen case because changed_inode did already initiate processing
6331 * of xattrs. The reason is the same as in changed_ref
6333 static int changed_xattr(struct send_ctx *sctx,
6334 enum btrfs_compare_tree_result result)
6336 int ret = 0;
6338 if (sctx->cur_ino != sctx->cmp_key->objectid) {
6339 inconsistent_snapshot_error(sctx, result, "xattr");
6340 return -EIO;
6343 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6344 if (result == BTRFS_COMPARE_TREE_NEW)
6345 ret = process_new_xattr(sctx);
6346 else if (result == BTRFS_COMPARE_TREE_DELETED)
6347 ret = process_deleted_xattr(sctx);
6348 else if (result == BTRFS_COMPARE_TREE_CHANGED)
6349 ret = process_changed_xattr(sctx);
6352 return ret;
6356 * Process new/deleted/changed extents. We skip processing in the
6357 * cur_inode_new_gen case because changed_inode did already initiate processing
6358 * of extents. The reason is the same as in changed_ref
6360 static int changed_extent(struct send_ctx *sctx,
6361 enum btrfs_compare_tree_result result)
6363 int ret = 0;
6366 * We have found an extent item that changed without the inode item
6367 * having changed. This can happen either after relocation (where the
6368 * disk_bytenr of an extent item is replaced at
6369 * relocation.c:replace_file_extents()) or after deduplication into a
6370 * file in both the parent and send snapshots (where an extent item can
6371 * get modified or replaced with a new one). Note that deduplication
6372 * updates the inode item, but it only changes the iversion (sequence
6373 * field in the inode item) of the inode, so if a file is deduplicated
6374 * the same amount of times in both the parent and send snapshots, its
6375 * iversion becames the same in both snapshots, whence the inode item is
6376 * the same on both snapshots.
6378 if (sctx->cur_ino != sctx->cmp_key->objectid)
6379 return 0;
6381 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6382 if (result != BTRFS_COMPARE_TREE_DELETED)
6383 ret = process_extent(sctx, sctx->left_path,
6384 sctx->cmp_key);
6387 return ret;
6390 static int dir_changed(struct send_ctx *sctx, u64 dir)
6392 u64 orig_gen, new_gen;
6393 int ret;
6395 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
6396 NULL, NULL);
6397 if (ret)
6398 return ret;
6400 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
6401 NULL, NULL, NULL);
6402 if (ret)
6403 return ret;
6405 return (orig_gen != new_gen) ? 1 : 0;
6408 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
6409 struct btrfs_key *key)
6411 struct btrfs_inode_extref *extref;
6412 struct extent_buffer *leaf;
6413 u64 dirid = 0, last_dirid = 0;
6414 unsigned long ptr;
6415 u32 item_size;
6416 u32 cur_offset = 0;
6417 int ref_name_len;
6418 int ret = 0;
6420 /* Easy case, just check this one dirid */
6421 if (key->type == BTRFS_INODE_REF_KEY) {
6422 dirid = key->offset;
6424 ret = dir_changed(sctx, dirid);
6425 goto out;
6428 leaf = path->nodes[0];
6429 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
6430 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
6431 while (cur_offset < item_size) {
6432 extref = (struct btrfs_inode_extref *)(ptr +
6433 cur_offset);
6434 dirid = btrfs_inode_extref_parent(leaf, extref);
6435 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
6436 cur_offset += ref_name_len + sizeof(*extref);
6437 if (dirid == last_dirid)
6438 continue;
6439 ret = dir_changed(sctx, dirid);
6440 if (ret)
6441 break;
6442 last_dirid = dirid;
6444 out:
6445 return ret;
6449 * Updates compare related fields in sctx and simply forwards to the actual
6450 * changed_xxx functions.
6452 static int changed_cb(struct btrfs_path *left_path,
6453 struct btrfs_path *right_path,
6454 struct btrfs_key *key,
6455 enum btrfs_compare_tree_result result,
6456 void *ctx)
6458 int ret = 0;
6459 struct send_ctx *sctx = ctx;
6461 if (result == BTRFS_COMPARE_TREE_SAME) {
6462 if (key->type == BTRFS_INODE_REF_KEY ||
6463 key->type == BTRFS_INODE_EXTREF_KEY) {
6464 ret = compare_refs(sctx, left_path, key);
6465 if (!ret)
6466 return 0;
6467 if (ret < 0)
6468 return ret;
6469 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
6470 return maybe_send_hole(sctx, left_path, key);
6471 } else {
6472 return 0;
6474 result = BTRFS_COMPARE_TREE_CHANGED;
6475 ret = 0;
6478 sctx->left_path = left_path;
6479 sctx->right_path = right_path;
6480 sctx->cmp_key = key;
6482 ret = finish_inode_if_needed(sctx, 0);
6483 if (ret < 0)
6484 goto out;
6486 /* Ignore non-FS objects */
6487 if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
6488 key->objectid == BTRFS_FREE_SPACE_OBJECTID)
6489 goto out;
6491 if (key->type == BTRFS_INODE_ITEM_KEY) {
6492 ret = changed_inode(sctx, result);
6493 } else if (!sctx->ignore_cur_inode) {
6494 if (key->type == BTRFS_INODE_REF_KEY ||
6495 key->type == BTRFS_INODE_EXTREF_KEY)
6496 ret = changed_ref(sctx, result);
6497 else if (key->type == BTRFS_XATTR_ITEM_KEY)
6498 ret = changed_xattr(sctx, result);
6499 else if (key->type == BTRFS_EXTENT_DATA_KEY)
6500 ret = changed_extent(sctx, result);
6503 out:
6504 return ret;
6507 static int full_send_tree(struct send_ctx *sctx)
6509 int ret;
6510 struct btrfs_root *send_root = sctx->send_root;
6511 struct btrfs_key key;
6512 struct btrfs_path *path;
6513 struct extent_buffer *eb;
6514 int slot;
6516 path = alloc_path_for_send();
6517 if (!path)
6518 return -ENOMEM;
6520 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6521 key.type = BTRFS_INODE_ITEM_KEY;
6522 key.offset = 0;
6524 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
6525 if (ret < 0)
6526 goto out;
6527 if (ret)
6528 goto out_finish;
6530 while (1) {
6531 eb = path->nodes[0];
6532 slot = path->slots[0];
6533 btrfs_item_key_to_cpu(eb, &key, slot);
6535 ret = changed_cb(path, NULL, &key,
6536 BTRFS_COMPARE_TREE_NEW, sctx);
6537 if (ret < 0)
6538 goto out;
6540 ret = btrfs_next_item(send_root, path);
6541 if (ret < 0)
6542 goto out;
6543 if (ret) {
6544 ret = 0;
6545 break;
6549 out_finish:
6550 ret = finish_inode_if_needed(sctx, 1);
6552 out:
6553 btrfs_free_path(path);
6554 return ret;
6557 static int send_subvol(struct send_ctx *sctx)
6559 int ret;
6561 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
6562 ret = send_header(sctx);
6563 if (ret < 0)
6564 goto out;
6567 ret = send_subvol_begin(sctx);
6568 if (ret < 0)
6569 goto out;
6571 if (sctx->parent_root) {
6572 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
6573 changed_cb, sctx);
6574 if (ret < 0)
6575 goto out;
6576 ret = finish_inode_if_needed(sctx, 1);
6577 if (ret < 0)
6578 goto out;
6579 } else {
6580 ret = full_send_tree(sctx);
6581 if (ret < 0)
6582 goto out;
6585 out:
6586 free_recorded_refs(sctx);
6587 return ret;
6591 * If orphan cleanup did remove any orphans from a root, it means the tree
6592 * was modified and therefore the commit root is not the same as the current
6593 * root anymore. This is a problem, because send uses the commit root and
6594 * therefore can see inode items that don't exist in the current root anymore,
6595 * and for example make calls to btrfs_iget, which will do tree lookups based
6596 * on the current root and not on the commit root. Those lookups will fail,
6597 * returning a -ESTALE error, and making send fail with that error. So make
6598 * sure a send does not see any orphans we have just removed, and that it will
6599 * see the same inodes regardless of whether a transaction commit happened
6600 * before it started (meaning that the commit root will be the same as the
6601 * current root) or not.
6603 static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
6605 int i;
6606 struct btrfs_trans_handle *trans = NULL;
6608 again:
6609 if (sctx->parent_root &&
6610 sctx->parent_root->node != sctx->parent_root->commit_root)
6611 goto commit_trans;
6613 for (i = 0; i < sctx->clone_roots_cnt; i++)
6614 if (sctx->clone_roots[i].root->node !=
6615 sctx->clone_roots[i].root->commit_root)
6616 goto commit_trans;
6618 if (trans)
6619 return btrfs_end_transaction(trans);
6621 return 0;
6623 commit_trans:
6624 /* Use any root, all fs roots will get their commit roots updated. */
6625 if (!trans) {
6626 trans = btrfs_join_transaction(sctx->send_root);
6627 if (IS_ERR(trans))
6628 return PTR_ERR(trans);
6629 goto again;
6632 return btrfs_commit_transaction(trans);
6636 * Make sure any existing dellaloc is flushed for any root used by a send
6637 * operation so that we do not miss any data and we do not race with writeback
6638 * finishing and changing a tree while send is using the tree. This could
6639 * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
6640 * a send operation then uses the subvolume.
6641 * After flushing delalloc ensure_commit_roots_uptodate() must be called.
6643 static int flush_delalloc_roots(struct send_ctx *sctx)
6645 struct btrfs_root *root = sctx->parent_root;
6646 int ret;
6647 int i;
6649 if (root) {
6650 ret = btrfs_start_delalloc_snapshot(root);
6651 if (ret)
6652 return ret;
6653 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
6656 for (i = 0; i < sctx->clone_roots_cnt; i++) {
6657 root = sctx->clone_roots[i].root;
6658 ret = btrfs_start_delalloc_snapshot(root);
6659 if (ret)
6660 return ret;
6661 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
6664 return 0;
6667 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
6669 spin_lock(&root->root_item_lock);
6670 root->send_in_progress--;
6672 * Not much left to do, we don't know why it's unbalanced and
6673 * can't blindly reset it to 0.
6675 if (root->send_in_progress < 0)
6676 btrfs_err(root->fs_info,
6677 "send_in_progress unbalanced %d root %llu",
6678 root->send_in_progress, root->root_key.objectid);
6679 spin_unlock(&root->root_item_lock);
6682 long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
6684 int ret = 0;
6685 struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
6686 struct btrfs_fs_info *fs_info = send_root->fs_info;
6687 struct btrfs_root *clone_root;
6688 struct btrfs_key key;
6689 struct send_ctx *sctx = NULL;
6690 u32 i;
6691 u64 *clone_sources_tmp = NULL;
6692 int clone_sources_to_rollback = 0;
6693 unsigned alloc_size;
6694 int sort_clone_roots = 0;
6695 int index;
6697 if (!capable(CAP_SYS_ADMIN))
6698 return -EPERM;
6701 * The subvolume must remain read-only during send, protect against
6702 * making it RW. This also protects against deletion.
6704 spin_lock(&send_root->root_item_lock);
6705 send_root->send_in_progress++;
6706 spin_unlock(&send_root->root_item_lock);
6709 * Userspace tools do the checks and warn the user if it's
6710 * not RO.
6712 if (!btrfs_root_readonly(send_root)) {
6713 ret = -EPERM;
6714 goto out;
6718 * Check that we don't overflow at later allocations, we request
6719 * clone_sources_count + 1 items, and compare to unsigned long inside
6720 * access_ok.
6722 if (arg->clone_sources_count >
6723 ULONG_MAX / sizeof(struct clone_root) - 1) {
6724 ret = -EINVAL;
6725 goto out;
6728 if (!access_ok(VERIFY_READ, arg->clone_sources,
6729 sizeof(*arg->clone_sources) *
6730 arg->clone_sources_count)) {
6731 ret = -EFAULT;
6732 goto out;
6735 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
6736 ret = -EINVAL;
6737 goto out;
6740 sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
6741 if (!sctx) {
6742 ret = -ENOMEM;
6743 goto out;
6746 INIT_LIST_HEAD(&sctx->new_refs);
6747 INIT_LIST_HEAD(&sctx->deleted_refs);
6748 INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
6749 INIT_LIST_HEAD(&sctx->name_cache_list);
6751 sctx->flags = arg->flags;
6753 sctx->send_filp = fget(arg->send_fd);
6754 if (!sctx->send_filp) {
6755 ret = -EBADF;
6756 goto out;
6759 sctx->send_root = send_root;
6761 * Unlikely but possible, if the subvolume is marked for deletion but
6762 * is slow to remove the directory entry, send can still be started
6764 if (btrfs_root_dead(sctx->send_root)) {
6765 ret = -EPERM;
6766 goto out;
6769 sctx->clone_roots_cnt = arg->clone_sources_count;
6771 sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
6772 sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
6773 if (!sctx->send_buf) {
6774 ret = -ENOMEM;
6775 goto out;
6778 sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
6779 if (!sctx->read_buf) {
6780 ret = -ENOMEM;
6781 goto out;
6784 sctx->pending_dir_moves = RB_ROOT;
6785 sctx->waiting_dir_moves = RB_ROOT;
6786 sctx->orphan_dirs = RB_ROOT;
6788 alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
6790 sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
6791 if (!sctx->clone_roots) {
6792 ret = -ENOMEM;
6793 goto out;
6796 alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
6798 if (arg->clone_sources_count) {
6799 clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
6800 if (!clone_sources_tmp) {
6801 ret = -ENOMEM;
6802 goto out;
6805 ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
6806 alloc_size);
6807 if (ret) {
6808 ret = -EFAULT;
6809 goto out;
6812 for (i = 0; i < arg->clone_sources_count; i++) {
6813 key.objectid = clone_sources_tmp[i];
6814 key.type = BTRFS_ROOT_ITEM_KEY;
6815 key.offset = (u64)-1;
6817 index = srcu_read_lock(&fs_info->subvol_srcu);
6819 clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
6820 if (IS_ERR(clone_root)) {
6821 srcu_read_unlock(&fs_info->subvol_srcu, index);
6822 ret = PTR_ERR(clone_root);
6823 goto out;
6825 spin_lock(&clone_root->root_item_lock);
6826 if (!btrfs_root_readonly(clone_root) ||
6827 btrfs_root_dead(clone_root)) {
6828 spin_unlock(&clone_root->root_item_lock);
6829 srcu_read_unlock(&fs_info->subvol_srcu, index);
6830 ret = -EPERM;
6831 goto out;
6833 clone_root->send_in_progress++;
6834 spin_unlock(&clone_root->root_item_lock);
6835 srcu_read_unlock(&fs_info->subvol_srcu, index);
6837 sctx->clone_roots[i].root = clone_root;
6838 clone_sources_to_rollback = i + 1;
6840 kvfree(clone_sources_tmp);
6841 clone_sources_tmp = NULL;
6844 if (arg->parent_root) {
6845 key.objectid = arg->parent_root;
6846 key.type = BTRFS_ROOT_ITEM_KEY;
6847 key.offset = (u64)-1;
6849 index = srcu_read_lock(&fs_info->subvol_srcu);
6851 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
6852 if (IS_ERR(sctx->parent_root)) {
6853 srcu_read_unlock(&fs_info->subvol_srcu, index);
6854 ret = PTR_ERR(sctx->parent_root);
6855 goto out;
6858 spin_lock(&sctx->parent_root->root_item_lock);
6859 sctx->parent_root->send_in_progress++;
6860 if (!btrfs_root_readonly(sctx->parent_root) ||
6861 btrfs_root_dead(sctx->parent_root)) {
6862 spin_unlock(&sctx->parent_root->root_item_lock);
6863 srcu_read_unlock(&fs_info->subvol_srcu, index);
6864 ret = -EPERM;
6865 goto out;
6867 spin_unlock(&sctx->parent_root->root_item_lock);
6869 srcu_read_unlock(&fs_info->subvol_srcu, index);
6873 * Clones from send_root are allowed, but only if the clone source
6874 * is behind the current send position. This is checked while searching
6875 * for possible clone sources.
6877 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
6879 /* We do a bsearch later */
6880 sort(sctx->clone_roots, sctx->clone_roots_cnt,
6881 sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
6882 NULL);
6883 sort_clone_roots = 1;
6885 ret = flush_delalloc_roots(sctx);
6886 if (ret)
6887 goto out;
6889 ret = ensure_commit_roots_uptodate(sctx);
6890 if (ret)
6891 goto out;
6893 current->journal_info = BTRFS_SEND_TRANS_STUB;
6894 ret = send_subvol(sctx);
6895 current->journal_info = NULL;
6896 if (ret < 0)
6897 goto out;
6899 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
6900 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
6901 if (ret < 0)
6902 goto out;
6903 ret = send_cmd(sctx);
6904 if (ret < 0)
6905 goto out;
6908 out:
6909 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
6910 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
6911 struct rb_node *n;
6912 struct pending_dir_move *pm;
6914 n = rb_first(&sctx->pending_dir_moves);
6915 pm = rb_entry(n, struct pending_dir_move, node);
6916 while (!list_empty(&pm->list)) {
6917 struct pending_dir_move *pm2;
6919 pm2 = list_first_entry(&pm->list,
6920 struct pending_dir_move, list);
6921 free_pending_move(sctx, pm2);
6923 free_pending_move(sctx, pm);
6926 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
6927 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
6928 struct rb_node *n;
6929 struct waiting_dir_move *dm;
6931 n = rb_first(&sctx->waiting_dir_moves);
6932 dm = rb_entry(n, struct waiting_dir_move, node);
6933 rb_erase(&dm->node, &sctx->waiting_dir_moves);
6934 kfree(dm);
6937 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
6938 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
6939 struct rb_node *n;
6940 struct orphan_dir_info *odi;
6942 n = rb_first(&sctx->orphan_dirs);
6943 odi = rb_entry(n, struct orphan_dir_info, node);
6944 free_orphan_dir_info(sctx, odi);
6947 if (sort_clone_roots) {
6948 for (i = 0; i < sctx->clone_roots_cnt; i++)
6949 btrfs_root_dec_send_in_progress(
6950 sctx->clone_roots[i].root);
6951 } else {
6952 for (i = 0; sctx && i < clone_sources_to_rollback; i++)
6953 btrfs_root_dec_send_in_progress(
6954 sctx->clone_roots[i].root);
6956 btrfs_root_dec_send_in_progress(send_root);
6958 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
6959 btrfs_root_dec_send_in_progress(sctx->parent_root);
6961 kvfree(clone_sources_tmp);
6963 if (sctx) {
6964 if (sctx->send_filp)
6965 fput(sctx->send_filp);
6967 kvfree(sctx->clone_roots);
6968 kvfree(sctx->send_buf);
6969 kvfree(sctx->read_buf);
6971 name_cache_free(sctx);
6973 kfree(sctx);
6976 return ret;