4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include <sys/dmu_impl.h>
28 #include <sys/dmu_tx.h>
30 #include <sys/dnode.h>
31 #include <sys/zfs_context.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dmu_traverse.h>
34 #include <sys/dsl_dataset.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/dsl_pool.h>
37 #include <sys/dsl_synctask.h>
38 #include <sys/zfs_ioctl.h>
40 #include <sys/zio_checksum.h>
42 static char *dmu_recv_tag
= "dmu_recv_tag";
45 dmu_replay_record_t
*drr
;
54 dump_bytes(struct backuparg
*ba
, void *buf
, int len
)
56 ssize_t resid
; /* have to get resid to get detailed errno */
57 ASSERT3U(len
% 8, ==, 0);
59 fletcher_4_incremental_native(buf
, len
, &ba
->zc
);
60 ba
->err
= vn_rdwr(UIO_WRITE
, ba
->vp
,
62 0, UIO_SYSSPACE
, FAPPEND
, RLIM64_INFINITY
, CRED(), &resid
);
68 dump_free(struct backuparg
*ba
, uint64_t object
, uint64_t offset
,
71 /* write a FREE record */
72 bzero(ba
->drr
, sizeof (dmu_replay_record_t
));
73 ba
->drr
->drr_type
= DRR_FREE
;
74 ba
->drr
->drr_u
.drr_free
.drr_object
= object
;
75 ba
->drr
->drr_u
.drr_free
.drr_offset
= offset
;
76 ba
->drr
->drr_u
.drr_free
.drr_length
= length
;
78 if (dump_bytes(ba
, ba
->drr
, sizeof (dmu_replay_record_t
)))
84 dump_data(struct backuparg
*ba
, dmu_object_type_t type
,
85 uint64_t object
, uint64_t offset
, int blksz
, void *data
)
87 /* write a DATA record */
88 bzero(ba
->drr
, sizeof (dmu_replay_record_t
));
89 ba
->drr
->drr_type
= DRR_WRITE
;
90 ba
->drr
->drr_u
.drr_write
.drr_object
= object
;
91 ba
->drr
->drr_u
.drr_write
.drr_type
= type
;
92 ba
->drr
->drr_u
.drr_write
.drr_offset
= offset
;
93 ba
->drr
->drr_u
.drr_write
.drr_length
= blksz
;
95 if (dump_bytes(ba
, ba
->drr
, sizeof (dmu_replay_record_t
)))
97 if (dump_bytes(ba
, data
, blksz
))
103 dump_freeobjects(struct backuparg
*ba
, uint64_t firstobj
, uint64_t numobjs
)
105 /* write a FREEOBJECTS record */
106 bzero(ba
->drr
, sizeof (dmu_replay_record_t
));
107 ba
->drr
->drr_type
= DRR_FREEOBJECTS
;
108 ba
->drr
->drr_u
.drr_freeobjects
.drr_firstobj
= firstobj
;
109 ba
->drr
->drr_u
.drr_freeobjects
.drr_numobjs
= numobjs
;
111 if (dump_bytes(ba
, ba
->drr
, sizeof (dmu_replay_record_t
)))
117 dump_dnode(struct backuparg
*ba
, uint64_t object
, dnode_phys_t
*dnp
)
119 if (dnp
== NULL
|| dnp
->dn_type
== DMU_OT_NONE
)
120 return (dump_freeobjects(ba
, object
, 1));
122 /* write an OBJECT record */
123 bzero(ba
->drr
, sizeof (dmu_replay_record_t
));
124 ba
->drr
->drr_type
= DRR_OBJECT
;
125 ba
->drr
->drr_u
.drr_object
.drr_object
= object
;
126 ba
->drr
->drr_u
.drr_object
.drr_type
= dnp
->dn_type
;
127 ba
->drr
->drr_u
.drr_object
.drr_bonustype
= dnp
->dn_bonustype
;
128 ba
->drr
->drr_u
.drr_object
.drr_blksz
=
129 dnp
->dn_datablkszsec
<< SPA_MINBLOCKSHIFT
;
130 ba
->drr
->drr_u
.drr_object
.drr_bonuslen
= dnp
->dn_bonuslen
;
131 ba
->drr
->drr_u
.drr_object
.drr_checksum
= dnp
->dn_checksum
;
132 ba
->drr
->drr_u
.drr_object
.drr_compress
= dnp
->dn_compress
;
134 if (dump_bytes(ba
, ba
->drr
, sizeof (dmu_replay_record_t
)))
137 if (dump_bytes(ba
, DN_BONUS(dnp
), P2ROUNDUP(dnp
->dn_bonuslen
, 8)))
140 /* free anything past the end of the file */
141 if (dump_free(ba
, object
, (dnp
->dn_maxblkid
+ 1) *
142 (dnp
->dn_datablkszsec
<< SPA_MINBLOCKSHIFT
), -1ULL))
149 #define BP_SPAN(dnp, level) \
150 (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
151 (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
154 backup_cb(spa_t
*spa
, blkptr_t
*bp
, const zbookmark_t
*zb
,
155 const dnode_phys_t
*dnp
, void *arg
)
157 struct backuparg
*ba
= arg
;
158 dmu_object_type_t type
= bp
? BP_GET_TYPE(bp
) : DMU_OT_NONE
;
161 if (issig(JUSTLOOKING
) && issig(FORREAL
))
164 if (bp
== NULL
&& zb
->zb_object
== 0) {
165 uint64_t span
= BP_SPAN(dnp
, zb
->zb_level
);
166 uint64_t dnobj
= (zb
->zb_blkid
* span
) >> DNODE_SHIFT
;
167 err
= dump_freeobjects(ba
, dnobj
, span
>> DNODE_SHIFT
);
168 } else if (bp
== NULL
) {
169 uint64_t span
= BP_SPAN(dnp
, zb
->zb_level
);
170 err
= dump_free(ba
, zb
->zb_object
, zb
->zb_blkid
* span
, span
);
171 } else if (zb
->zb_level
> 0 || type
== DMU_OT_OBJSET
) {
173 } else if (type
== DMU_OT_DNODE
) {
176 int blksz
= BP_GET_LSIZE(bp
);
177 uint32_t aflags
= ARC_WAIT
;
180 if (arc_read_nolock(NULL
, spa
, bp
,
181 arc_getbuf_func
, &abuf
, ZIO_PRIORITY_ASYNC_READ
,
182 ZIO_FLAG_CANFAIL
, &aflags
, zb
) != 0)
186 for (i
= 0; i
< blksz
>> DNODE_SHIFT
; i
++) {
187 uint64_t dnobj
= (zb
->zb_blkid
<<
188 (DNODE_BLOCK_SHIFT
- DNODE_SHIFT
)) + i
;
189 err
= dump_dnode(ba
, dnobj
, blk
+i
);
193 (void) arc_buf_remove_ref(abuf
, &abuf
);
194 } else { /* it's a level-0 block of a regular object */
195 uint32_t aflags
= ARC_WAIT
;
197 int blksz
= BP_GET_LSIZE(bp
);
199 if (arc_read_nolock(NULL
, spa
, bp
,
200 arc_getbuf_func
, &abuf
, ZIO_PRIORITY_ASYNC_READ
,
201 ZIO_FLAG_CANFAIL
, &aflags
, zb
) != 0)
204 err
= dump_data(ba
, type
, zb
->zb_object
, zb
->zb_blkid
* blksz
,
205 blksz
, abuf
->b_data
);
206 (void) arc_buf_remove_ref(abuf
, &abuf
);
209 ASSERT(err
== 0 || err
== EINTR
);
214 dmu_sendbackup(objset_t
*tosnap
, objset_t
*fromsnap
, boolean_t fromorigin
,
215 vnode_t
*vp
, offset_t
*off
)
217 dsl_dataset_t
*ds
= tosnap
->os
->os_dsl_dataset
;
218 dsl_dataset_t
*fromds
= fromsnap
? fromsnap
->os
->os_dsl_dataset
: NULL
;
219 dmu_replay_record_t
*drr
;
222 uint64_t fromtxg
= 0;
224 /* tosnap must be a snapshot */
225 if (ds
->ds_phys
->ds_next_snap_obj
== 0)
228 /* fromsnap must be an earlier snapshot from the same fs as tosnap */
229 if (fromds
&& (ds
->ds_dir
!= fromds
->ds_dir
||
230 fromds
->ds_phys
->ds_creation_txg
>= ds
->ds_phys
->ds_creation_txg
))
234 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
239 if (dsl_dir_is_clone(ds
->ds_dir
)) {
240 rw_enter(&dp
->dp_config_rwlock
, RW_READER
);
241 err
= dsl_dataset_hold_obj(dp
,
242 ds
->ds_dir
->dd_phys
->dd_origin_obj
, FTAG
, &fromds
);
243 rw_exit(&dp
->dp_config_rwlock
);
247 fromorigin
= B_FALSE
;
252 drr
= kmem_zalloc(sizeof (dmu_replay_record_t
), KM_SLEEP
);
253 drr
->drr_type
= DRR_BEGIN
;
254 drr
->drr_u
.drr_begin
.drr_magic
= DMU_BACKUP_MAGIC
;
255 drr
->drr_u
.drr_begin
.drr_version
= DMU_BACKUP_STREAM_VERSION
;
256 drr
->drr_u
.drr_begin
.drr_creation_time
=
257 ds
->ds_phys
->ds_creation_time
;
258 drr
->drr_u
.drr_begin
.drr_type
= tosnap
->os
->os_phys
->os_type
;
260 drr
->drr_u
.drr_begin
.drr_flags
|= DRR_FLAG_CLONE
;
261 drr
->drr_u
.drr_begin
.drr_toguid
= ds
->ds_phys
->ds_guid
;
262 if (ds
->ds_phys
->ds_flags
& DS_FLAG_CI_DATASET
)
263 drr
->drr_u
.drr_begin
.drr_flags
|= DRR_FLAG_CI_DATA
;
266 drr
->drr_u
.drr_begin
.drr_fromguid
= fromds
->ds_phys
->ds_guid
;
267 dsl_dataset_name(ds
, drr
->drr_u
.drr_begin
.drr_toname
);
270 fromtxg
= fromds
->ds_phys
->ds_creation_txg
;
272 dsl_dataset_rele(fromds
, FTAG
);
278 ZIO_SET_CHECKSUM(&ba
.zc
, 0, 0, 0, 0);
280 if (dump_bytes(&ba
, drr
, sizeof (dmu_replay_record_t
))) {
281 kmem_free(drr
, sizeof (dmu_replay_record_t
));
285 err
= traverse_dataset(ds
, fromtxg
, TRAVERSE_PRE
| TRAVERSE_PREFETCH
,
289 if (err
== EINTR
&& ba
.err
)
291 kmem_free(drr
, sizeof (dmu_replay_record_t
));
295 bzero(drr
, sizeof (dmu_replay_record_t
));
296 drr
->drr_type
= DRR_END
;
297 drr
->drr_u
.drr_end
.drr_checksum
= ba
.zc
;
299 if (dump_bytes(&ba
, drr
, sizeof (dmu_replay_record_t
))) {
300 kmem_free(drr
, sizeof (dmu_replay_record_t
));
304 kmem_free(drr
, sizeof (dmu_replay_record_t
));
309 struct recvbeginsyncarg
{
312 dsl_dataset_t
*origin
;
314 dmu_objset_type_t type
;
318 char clonelastname
[MAXNAMELEN
];
319 dsl_dataset_t
*ds
; /* the ds to recv into; returned from the syncfunc */
322 static dsl_dataset_t
*
323 recv_full_sync_impl(dsl_pool_t
*dp
, uint64_t dsobj
, dmu_objset_type_t type
,
324 cred_t
*cr
, dmu_tx_t
*tx
)
328 /* This should always work, since we just created it */
329 /* XXX - create should return an owned ds */
330 VERIFY(0 == dsl_dataset_own_obj(dp
, dsobj
,
331 DS_MODE_INCONSISTENT
, dmu_recv_tag
, &ds
));
333 if (type
!= DMU_OST_NONE
) {
334 (void) dmu_objset_create_impl(dp
->dp_spa
,
335 ds
, &ds
->ds_phys
->ds_bp
, type
, tx
);
338 spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC
,
339 dp
->dp_spa
, tx
, cr
, "dataset = %lld", dsobj
);
346 recv_full_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
348 dsl_dir_t
*dd
= arg1
;
349 struct recvbeginsyncarg
*rbsa
= arg2
;
350 objset_t
*mos
= dd
->dd_pool
->dp_meta_objset
;
354 err
= zap_lookup(mos
, dd
->dd_phys
->dd_child_dir_zapobj
,
355 strrchr(rbsa
->tofs
, '/') + 1, sizeof (uint64_t), 1, &val
);
358 return (err
? err
: EEXIST
);
361 /* make sure it's a snap in the same pool */
362 if (rbsa
->origin
->ds_dir
->dd_pool
!= dd
->dd_pool
)
364 if (rbsa
->origin
->ds_phys
->ds_num_children
== 0)
366 if (rbsa
->origin
->ds_phys
->ds_guid
!= rbsa
->fromguid
)
374 recv_full_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
376 dsl_dir_t
*dd
= arg1
;
377 struct recvbeginsyncarg
*rbsa
= arg2
;
378 uint64_t flags
= DS_FLAG_INCONSISTENT
| rbsa
->dsflags
;
381 dsobj
= dsl_dataset_create_sync(dd
, strrchr(rbsa
->tofs
, '/') + 1,
382 rbsa
->origin
, flags
, cr
, tx
);
384 rbsa
->ds
= recv_full_sync_impl(dd
->dd_pool
, dsobj
,
385 rbsa
->origin
? DMU_OST_NONE
: rbsa
->type
, cr
, tx
);
389 recv_full_existing_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
391 dsl_dataset_t
*ds
= arg1
;
392 struct recvbeginsyncarg
*rbsa
= arg2
;
395 /* must be a head ds */
396 if (ds
->ds_phys
->ds_next_snap_obj
!= 0)
399 /* must not be a clone ds */
400 if (dsl_dir_is_clone(ds
->ds_dir
))
403 err
= dsl_dataset_destroy_check(ds
, rbsa
->tag
, tx
);
408 /* make sure it's a snap in the same pool */
409 if (rbsa
->origin
->ds_dir
->dd_pool
!= ds
->ds_dir
->dd_pool
)
411 if (rbsa
->origin
->ds_phys
->ds_num_children
== 0)
413 if (rbsa
->origin
->ds_phys
->ds_guid
!= rbsa
->fromguid
)
421 recv_full_existing_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
423 dsl_dataset_t
*ds
= arg1
;
424 struct recvbeginsyncarg
*rbsa
= arg2
;
425 dsl_dir_t
*dd
= ds
->ds_dir
;
426 uint64_t flags
= DS_FLAG_INCONSISTENT
| rbsa
->dsflags
;
430 * NB: caller must provide an extra hold on the dsl_dir_t, so it
431 * won't go away when dsl_dataset_destroy_sync() closes the
434 dsl_dataset_destroy_sync(ds
, rbsa
->tag
, cr
, tx
);
436 dsobj
= dsl_dataset_create_sync_dd(dd
, rbsa
->origin
, flags
, tx
);
438 rbsa
->ds
= recv_full_sync_impl(dd
->dd_pool
, dsobj
,
439 rbsa
->origin
? DMU_OST_NONE
: rbsa
->type
, cr
, tx
);
444 recv_incremental_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
446 dsl_dataset_t
*ds
= arg1
;
447 struct recvbeginsyncarg
*rbsa
= arg2
;
451 /* must not have any changes since most recent snapshot */
452 if (!rbsa
->force
&& dsl_dataset_modified_since_lastsnap(ds
))
455 /* must already be a snapshot of this fs */
456 if (ds
->ds_phys
->ds_prev_snap_obj
== 0)
459 /* most recent snapshot must match fromguid */
460 if (ds
->ds_prev
->ds_phys
->ds_guid
!= rbsa
->fromguid
)
463 /* temporary clone name must not exist */
464 err
= zap_lookup(ds
->ds_dir
->dd_pool
->dp_meta_objset
,
465 ds
->ds_dir
->dd_phys
->dd_child_dir_zapobj
,
466 rbsa
->clonelastname
, 8, 1, &val
);
472 /* new snapshot name must not exist */
473 err
= zap_lookup(ds
->ds_dir
->dd_pool
->dp_meta_objset
,
474 ds
->ds_phys
->ds_snapnames_zapobj
, rbsa
->tosnap
, 8, 1, &val
);
484 recv_online_incremental_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
486 dsl_dataset_t
*ohds
= arg1
;
487 struct recvbeginsyncarg
*rbsa
= arg2
;
488 dsl_pool_t
*dp
= ohds
->ds_dir
->dd_pool
;
489 dsl_dataset_t
*ods
, *cds
;
490 uint64_t flags
= DS_FLAG_INCONSISTENT
| rbsa
->dsflags
;
493 /* create the temporary clone */
494 VERIFY(0 == dsl_dataset_hold_obj(dp
, ohds
->ds_phys
->ds_prev_snap_obj
,
496 dsobj
= dsl_dataset_create_sync(ohds
->ds_dir
,
497 rbsa
->clonelastname
, ods
, flags
, cr
, tx
);
498 dsl_dataset_rele(ods
, FTAG
);
500 /* open the temporary clone */
501 VERIFY(0 == dsl_dataset_own_obj(dp
, dsobj
,
502 DS_MODE_INCONSISTENT
, dmu_recv_tag
, &cds
));
504 /* copy the refquota from the target fs to the clone */
505 if (ohds
->ds_quota
> 0)
506 dsl_dataset_set_quota_sync(cds
, &ohds
->ds_quota
, cr
, tx
);
510 spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC
,
511 dp
->dp_spa
, tx
, cr
, "dataset = %lld", dsobj
);
516 recv_offline_incremental_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
518 dsl_dataset_t
*ds
= arg1
;
520 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
521 ds
->ds_phys
->ds_flags
|= DS_FLAG_INCONSISTENT
;
523 spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC
,
524 ds
->ds_dir
->dd_pool
->dp_spa
, tx
, cr
, "dataset = %lld",
529 * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
530 * succeeds; otherwise we will leak the holds on the datasets.
533 dmu_recv_begin(char *tofs
, char *tosnap
, struct drr_begin
*drrb
,
534 boolean_t force
, objset_t
*origin
, boolean_t online
, dmu_recv_cookie_t
*drc
)
538 struct recvbeginsyncarg rbsa
;
543 if (drrb
->drr_magic
== DMU_BACKUP_MAGIC
)
545 else if (drrb
->drr_magic
== BSWAP_64(DMU_BACKUP_MAGIC
))
551 rbsa
.tosnap
= tosnap
;
552 rbsa
.origin
= origin
? origin
->os
->os_dsl_dataset
: NULL
;
553 rbsa
.fromguid
= drrb
->drr_fromguid
;
554 rbsa
.type
= drrb
->drr_type
;
557 version
= drrb
->drr_version
;
558 flags
= drrb
->drr_flags
;
561 rbsa
.type
= BSWAP_32(rbsa
.type
);
562 rbsa
.fromguid
= BSWAP_64(rbsa
.fromguid
);
563 version
= BSWAP_64(version
);
564 flags
= BSWAP_32(flags
);
567 if (version
!= DMU_BACKUP_STREAM_VERSION
||
568 rbsa
.type
>= DMU_OST_NUMTYPES
||
569 ((flags
& DRR_FLAG_CLONE
) && origin
== NULL
))
572 if (flags
& DRR_FLAG_CI_DATA
)
573 rbsa
.dsflags
= DS_FLAG_CI_DATASET
;
575 bzero(drc
, sizeof (dmu_recv_cookie_t
));
576 drc
->drc_drrb
= drrb
;
577 drc
->drc_tosnap
= tosnap
;
578 drc
->drc_force
= force
;
581 * Process the begin in syncing context.
583 if (rbsa
.fromguid
&& !(flags
& DRR_FLAG_CLONE
) && !online
) {
584 /* offline incremental receive */
585 err
= dsl_dataset_own(tofs
, 0, dmu_recv_tag
, &ds
);
590 * Only do the rollback if the most recent snapshot
591 * matches the incremental source
594 if (ds
->ds_prev
== NULL
||
595 ds
->ds_prev
->ds_phys
->ds_guid
!=
597 dsl_dataset_disown(ds
, dmu_recv_tag
);
600 (void) dsl_dataset_rollback(ds
, DMU_OST_NONE
);
602 rbsa
.force
= B_FALSE
;
603 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
604 recv_incremental_check
,
605 recv_offline_incremental_sync
, ds
, &rbsa
, 1);
607 dsl_dataset_disown(ds
, dmu_recv_tag
);
610 drc
->drc_logical_ds
= drc
->drc_real_ds
= ds
;
611 } else if (rbsa
.fromguid
&& !(flags
& DRR_FLAG_CLONE
)) {
612 /* online incremental receive */
614 /* tmp clone name is: tofs/%tosnap" */
615 (void) snprintf(rbsa
.clonelastname
, sizeof (rbsa
.clonelastname
),
618 /* open the dataset we are logically receiving into */
619 err
= dsl_dataset_hold(tofs
, dmu_recv_tag
, &ds
);
624 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
625 recv_incremental_check
,
626 recv_online_incremental_sync
, ds
, &rbsa
, 5);
628 dsl_dataset_rele(ds
, dmu_recv_tag
);
631 drc
->drc_logical_ds
= ds
;
632 drc
->drc_real_ds
= rbsa
.ds
;
634 /* create new fs -- full backup or clone */
635 dsl_dir_t
*dd
= NULL
;
638 err
= dsl_dir_open(tofs
, FTAG
, &dd
, &tail
);
643 dsl_dir_close(dd
, FTAG
);
647 rw_enter(&dd
->dd_pool
->dp_config_rwlock
, RW_READER
);
648 err
= dsl_dataset_own_obj(dd
->dd_pool
,
649 dd
->dd_phys
->dd_head_dataset_obj
,
650 DS_MODE_INCONSISTENT
, FTAG
, &ds
);
651 rw_exit(&dd
->dd_pool
->dp_config_rwlock
);
653 dsl_dir_close(dd
, FTAG
);
657 dsl_dataset_make_exclusive(ds
, FTAG
);
658 err
= dsl_sync_task_do(dd
->dd_pool
,
659 recv_full_existing_check
,
660 recv_full_existing_sync
, ds
, &rbsa
, 5);
661 dsl_dataset_disown(ds
, FTAG
);
663 err
= dsl_sync_task_do(dd
->dd_pool
, recv_full_check
,
664 recv_full_sync
, dd
, &rbsa
, 5);
666 dsl_dir_close(dd
, FTAG
);
669 drc
->drc_logical_ds
= drc
->drc_real_ds
= rbsa
.ds
;
670 drc
->drc_newfs
= B_TRUE
;
682 int bufsize
; /* amount of memory allocated for buf */
687 restore_read(struct restorearg
*ra
, int len
)
692 /* some things will require 8-byte alignment, so everything must */
693 ASSERT3U(len
% 8, ==, 0);
698 ra
->err
= vn_rdwr(UIO_READ
, ra
->vp
,
699 (caddr_t
)ra
->buf
+ done
, len
- done
,
700 ra
->voff
, UIO_SYSSPACE
, FAPPEND
,
701 RLIM64_INFINITY
, CRED(), &resid
);
703 if (resid
== len
- done
)
705 ra
->voff
+= len
- done
- resid
;
711 ASSERT3U(done
, ==, len
);
714 fletcher_4_incremental_byteswap(rv
, len
, &ra
->cksum
);
716 fletcher_4_incremental_native(rv
, len
, &ra
->cksum
);
721 backup_byteswap(dmu_replay_record_t
*drr
)
723 #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
724 #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
725 drr
->drr_type
= BSWAP_32(drr
->drr_type
);
726 drr
->drr_payloadlen
= BSWAP_32(drr
->drr_payloadlen
);
727 switch (drr
->drr_type
) {
729 DO64(drr_begin
.drr_magic
);
730 DO64(drr_begin
.drr_version
);
731 DO64(drr_begin
.drr_creation_time
);
732 DO32(drr_begin
.drr_type
);
733 DO32(drr_begin
.drr_flags
);
734 DO64(drr_begin
.drr_toguid
);
735 DO64(drr_begin
.drr_fromguid
);
738 DO64(drr_object
.drr_object
);
739 /* DO64(drr_object.drr_allocation_txg); */
740 DO32(drr_object
.drr_type
);
741 DO32(drr_object
.drr_bonustype
);
742 DO32(drr_object
.drr_blksz
);
743 DO32(drr_object
.drr_bonuslen
);
745 case DRR_FREEOBJECTS
:
746 DO64(drr_freeobjects
.drr_firstobj
);
747 DO64(drr_freeobjects
.drr_numobjs
);
750 DO64(drr_write
.drr_object
);
751 DO32(drr_write
.drr_type
);
752 DO64(drr_write
.drr_offset
);
753 DO64(drr_write
.drr_length
);
756 DO64(drr_free
.drr_object
);
757 DO64(drr_free
.drr_offset
);
758 DO64(drr_free
.drr_length
);
761 DO64(drr_end
.drr_checksum
.zc_word
[0]);
762 DO64(drr_end
.drr_checksum
.zc_word
[1]);
763 DO64(drr_end
.drr_checksum
.zc_word
[2]);
764 DO64(drr_end
.drr_checksum
.zc_word
[3]);
772 restore_object(struct restorearg
*ra
, objset_t
*os
, struct drr_object
*drro
)
778 err
= dmu_object_info(os
, drro
->drr_object
, NULL
);
780 if (err
!= 0 && err
!= ENOENT
)
783 if (drro
->drr_type
== DMU_OT_NONE
||
784 drro
->drr_type
>= DMU_OT_NUMTYPES
||
785 drro
->drr_bonustype
>= DMU_OT_NUMTYPES
||
786 drro
->drr_checksum
>= ZIO_CHECKSUM_FUNCTIONS
||
787 drro
->drr_compress
>= ZIO_COMPRESS_FUNCTIONS
||
788 P2PHASE(drro
->drr_blksz
, SPA_MINBLOCKSIZE
) ||
789 drro
->drr_blksz
< SPA_MINBLOCKSIZE
||
790 drro
->drr_blksz
> SPA_MAXBLOCKSIZE
||
791 drro
->drr_bonuslen
> DN_MAX_BONUSLEN
) {
795 if (drro
->drr_bonuslen
) {
796 data
= restore_read(ra
, P2ROUNDUP(drro
->drr_bonuslen
, 8));
801 tx
= dmu_tx_create(os
);
804 /* currently free, want to be allocated */
805 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
806 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, 1);
807 err
= dmu_tx_assign(tx
, TXG_WAIT
);
812 err
= dmu_object_claim(os
, drro
->drr_object
,
813 drro
->drr_type
, drro
->drr_blksz
,
814 drro
->drr_bonustype
, drro
->drr_bonuslen
, tx
);
816 /* currently allocated, want to be allocated */
817 dmu_tx_hold_bonus(tx
, drro
->drr_object
);
819 * We may change blocksize, so need to
822 dmu_tx_hold_write(tx
, drro
->drr_object
, 0, 1);
823 err
= dmu_tx_assign(tx
, TXG_WAIT
);
829 err
= dmu_object_reclaim(os
, drro
->drr_object
,
830 drro
->drr_type
, drro
->drr_blksz
,
831 drro
->drr_bonustype
, drro
->drr_bonuslen
, tx
);
838 dmu_object_set_checksum(os
, drro
->drr_object
, drro
->drr_checksum
, tx
);
839 dmu_object_set_compress(os
, drro
->drr_object
, drro
->drr_compress
, tx
);
844 VERIFY(0 == dmu_bonus_hold(os
, drro
->drr_object
, FTAG
, &db
));
845 dmu_buf_will_dirty(db
, tx
);
847 ASSERT3U(db
->db_size
, >=, drro
->drr_bonuslen
);
848 bcopy(data
, db
->db_data
, drro
->drr_bonuslen
);
850 dmu_ot
[drro
->drr_bonustype
].ot_byteswap(db
->db_data
,
853 dmu_buf_rele(db
, FTAG
);
861 restore_freeobjects(struct restorearg
*ra
, objset_t
*os
,
862 struct drr_freeobjects
*drrfo
)
866 if (drrfo
->drr_firstobj
+ drrfo
->drr_numobjs
< drrfo
->drr_firstobj
)
869 for (obj
= drrfo
->drr_firstobj
;
870 obj
< drrfo
->drr_firstobj
+ drrfo
->drr_numobjs
;
871 (void) dmu_object_next(os
, &obj
, FALSE
, 0)) {
874 if (dmu_object_info(os
, obj
, NULL
) != 0)
877 err
= dmu_free_object(os
, obj
);
885 restore_write(struct restorearg
*ra
, objset_t
*os
,
886 struct drr_write
*drrw
)
892 if (drrw
->drr_offset
+ drrw
->drr_length
< drrw
->drr_offset
||
893 drrw
->drr_type
>= DMU_OT_NUMTYPES
)
896 data
= restore_read(ra
, drrw
->drr_length
);
900 if (dmu_object_info(os
, drrw
->drr_object
, NULL
) != 0)
903 tx
= dmu_tx_create(os
);
905 dmu_tx_hold_write(tx
, drrw
->drr_object
,
906 drrw
->drr_offset
, drrw
->drr_length
);
907 err
= dmu_tx_assign(tx
, TXG_WAIT
);
913 dmu_ot
[drrw
->drr_type
].ot_byteswap(data
, drrw
->drr_length
);
914 dmu_write(os
, drrw
->drr_object
,
915 drrw
->drr_offset
, drrw
->drr_length
, data
, tx
);
922 restore_free(struct restorearg
*ra
, objset_t
*os
,
923 struct drr_free
*drrf
)
927 if (drrf
->drr_length
!= -1ULL &&
928 drrf
->drr_offset
+ drrf
->drr_length
< drrf
->drr_offset
)
931 if (dmu_object_info(os
, drrf
->drr_object
, NULL
) != 0)
934 err
= dmu_free_long_range(os
, drrf
->drr_object
,
935 drrf
->drr_offset
, drrf
->drr_length
);
940 dmu_recv_abort_cleanup(dmu_recv_cookie_t
*drc
)
942 if (drc
->drc_newfs
|| drc
->drc_real_ds
!= drc
->drc_logical_ds
) {
944 * online incremental or new fs: destroy the fs (which
945 * may be a clone) that we created
947 (void) dsl_dataset_destroy(drc
->drc_real_ds
, dmu_recv_tag
);
948 if (drc
->drc_real_ds
!= drc
->drc_logical_ds
)
949 dsl_dataset_rele(drc
->drc_logical_ds
, dmu_recv_tag
);
952 * offline incremental: rollback to most recent snapshot.
954 (void) dsl_dataset_rollback(drc
->drc_real_ds
, DMU_OST_NONE
);
955 dsl_dataset_disown(drc
->drc_real_ds
, dmu_recv_tag
);
960 * NB: callers *must* call dmu_recv_end() if this succeeds.
963 dmu_recv_stream(dmu_recv_cookie_t
*drc
, vnode_t
*vp
, offset_t
*voffp
)
965 struct restorearg ra
= { 0 };
966 dmu_replay_record_t
*drr
;
970 if (drc
->drc_drrb
->drr_magic
== BSWAP_64(DMU_BACKUP_MAGIC
))
974 /* compute checksum of drr_begin record */
975 dmu_replay_record_t
*drr
;
976 drr
= kmem_zalloc(sizeof (dmu_replay_record_t
), KM_SLEEP
);
978 drr
->drr_type
= DRR_BEGIN
;
979 drr
->drr_u
.drr_begin
= *drc
->drc_drrb
;
981 fletcher_4_incremental_byteswap(drr
,
982 sizeof (dmu_replay_record_t
), &ra
.cksum
);
984 fletcher_4_incremental_native(drr
,
985 sizeof (dmu_replay_record_t
), &ra
.cksum
);
987 kmem_free(drr
, sizeof (dmu_replay_record_t
));
991 struct drr_begin
*drrb
= drc
->drc_drrb
;
992 drrb
->drr_magic
= BSWAP_64(drrb
->drr_magic
);
993 drrb
->drr_version
= BSWAP_64(drrb
->drr_version
);
994 drrb
->drr_creation_time
= BSWAP_64(drrb
->drr_creation_time
);
995 drrb
->drr_type
= BSWAP_32(drrb
->drr_type
);
996 drrb
->drr_toguid
= BSWAP_64(drrb
->drr_toguid
);
997 drrb
->drr_fromguid
= BSWAP_64(drrb
->drr_fromguid
);
1003 ra
.buf
= kmem_alloc(ra
.bufsize
, KM_SLEEP
);
1005 /* these were verified in dmu_recv_begin */
1006 ASSERT(drc
->drc_drrb
->drr_version
== DMU_BACKUP_STREAM_VERSION
);
1007 ASSERT(drc
->drc_drrb
->drr_type
< DMU_OST_NUMTYPES
);
1010 * Open the objset we are modifying.
1012 VERIFY(dmu_objset_open_ds(drc
->drc_real_ds
, DMU_OST_ANY
, &os
) == 0);
1014 ASSERT(drc
->drc_real_ds
->ds_phys
->ds_flags
& DS_FLAG_INCONSISTENT
);
1017 * Read records and process them.
1020 while (ra
.err
== 0 &&
1021 NULL
!= (drr
= restore_read(&ra
, sizeof (*drr
)))) {
1022 if (issig(JUSTLOOKING
) && issig(FORREAL
)) {
1028 backup_byteswap(drr
);
1030 switch (drr
->drr_type
) {
1034 * We need to make a copy of the record header,
1035 * because restore_{object,write} may need to
1036 * restore_read(), which will invalidate drr.
1038 struct drr_object drro
= drr
->drr_u
.drr_object
;
1039 ra
.err
= restore_object(&ra
, os
, &drro
);
1042 case DRR_FREEOBJECTS
:
1044 struct drr_freeobjects drrfo
=
1045 drr
->drr_u
.drr_freeobjects
;
1046 ra
.err
= restore_freeobjects(&ra
, os
, &drrfo
);
1051 struct drr_write drrw
= drr
->drr_u
.drr_write
;
1052 ra
.err
= restore_write(&ra
, os
, &drrw
);
1057 struct drr_free drrf
= drr
->drr_u
.drr_free
;
1058 ra
.err
= restore_free(&ra
, os
, &drrf
);
1063 struct drr_end drre
= drr
->drr_u
.drr_end
;
1065 * We compare against the *previous* checksum
1066 * value, because the stored checksum is of
1067 * everything before the DRR_END record.
1069 if (!ZIO_CHECKSUM_EQUAL(drre
.drr_checksum
, pcksum
))
1079 ASSERT(ra
.err
!= 0);
1082 dmu_objset_close(os
);
1086 * rollback or destroy what we created, so we don't
1087 * leave it in the restoring state.
1089 txg_wait_synced(drc
->drc_real_ds
->ds_dir
->dd_pool
, 0);
1090 dmu_recv_abort_cleanup(drc
);
1093 kmem_free(ra
.buf
, ra
.bufsize
);
1098 struct recvendsyncarg
{
1100 uint64_t creation_time
;
1105 recv_end_check(void *arg1
, void *arg2
, dmu_tx_t
*tx
)
1107 dsl_dataset_t
*ds
= arg1
;
1108 struct recvendsyncarg
*resa
= arg2
;
1110 return (dsl_dataset_snapshot_check(ds
, resa
->tosnap
, tx
));
1114 recv_end_sync(void *arg1
, void *arg2
, cred_t
*cr
, dmu_tx_t
*tx
)
1116 dsl_dataset_t
*ds
= arg1
;
1117 struct recvendsyncarg
*resa
= arg2
;
1119 dsl_dataset_snapshot_sync(ds
, resa
->tosnap
, cr
, tx
);
1121 /* set snapshot's creation time and guid */
1122 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
1123 ds
->ds_prev
->ds_phys
->ds_creation_time
= resa
->creation_time
;
1124 ds
->ds_prev
->ds_phys
->ds_guid
= resa
->toguid
;
1125 ds
->ds_prev
->ds_phys
->ds_flags
&= ~DS_FLAG_INCONSISTENT
;
1127 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1128 ds
->ds_phys
->ds_flags
&= ~DS_FLAG_INCONSISTENT
;
1132 dmu_recv_end(dmu_recv_cookie_t
*drc
)
1134 struct recvendsyncarg resa
;
1135 dsl_dataset_t
*ds
= drc
->drc_logical_ds
;
1139 * XXX hack; seems the ds is still dirty and
1140 * dsl_pool_zil_clean() expects it to have a ds_user_ptr
1141 * (and zil), but clone_swap() can close it.
1143 txg_wait_synced(ds
->ds_dir
->dd_pool
, 0);
1145 if (ds
!= drc
->drc_real_ds
) {
1146 /* we are doing an online recv */
1147 if (dsl_dataset_tryown(ds
, FALSE
, dmu_recv_tag
)) {
1148 err
= dsl_dataset_clone_swap(drc
->drc_real_ds
, ds
,
1151 dsl_dataset_disown(ds
, dmu_recv_tag
);
1154 dsl_dataset_rele(ds
, dmu_recv_tag
);
1156 /* dsl_dataset_destroy() will disown the ds */
1157 (void) dsl_dataset_destroy(drc
->drc_real_ds
, dmu_recv_tag
);
1162 resa
.creation_time
= drc
->drc_drrb
->drr_creation_time
;
1163 resa
.toguid
= drc
->drc_drrb
->drr_toguid
;
1164 resa
.tosnap
= drc
->drc_tosnap
;
1166 err
= dsl_sync_task_do(ds
->ds_dir
->dd_pool
,
1167 recv_end_check
, recv_end_sync
, ds
, &resa
, 3);
1169 if (drc
->drc_newfs
) {
1170 ASSERT(ds
== drc
->drc_real_ds
);
1171 (void) dsl_dataset_destroy(ds
, dmu_recv_tag
);
1174 (void) dsl_dataset_rollback(ds
, DMU_OST_NONE
);
1178 /* release the hold from dmu_recv_begin */
1179 dsl_dataset_disown(ds
, dmu_recv_tag
);