2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
16 /* Clone a volume. Assumes the new volume is already created */
18 #include <afsconfig.h>
19 #include <afs/param.h>
22 #include <sys/types.h>
24 #include <afs/afs_assert.h>
41 #include <afs/afsint.h>
45 #include <afs/afssyscalls.h>
49 #include "partition.h"
50 #include "viceinode.h"
51 #include "vol_prototypes.h"
54 int (*vol_PollProc
) (void) = 0; /* someone must init this */
56 #define ERROR_EXIT(code) do { \
61 /* parameters for idec call - this could just be an IHandle_t, but leaving
62 * open the possibility of decrementing the special files as well.
69 #define CLONE_MAXITEMS 100
71 struct clone_items
*next
;
73 Inode data
[CLONE_MAXITEMS
];
77 struct clone_items
*first
;
78 struct clone_items
*last
;
81 void CloneVolume(Error
*, Volume
*, Volume
*, Volume
*);
84 ci_AddItem(struct clone_head
*ah
, Inode aino
)
86 struct clone_items
*ti
;
88 /* if no last elt (first call) or last item full, get a new one */
89 if ((!ah
->last
) || ah
->last
->nitems
>= CLONE_MAXITEMS
) {
90 ti
= (struct clone_items
*)malloc(sizeof(struct clone_items
));
92 Log("ci_AddItem: malloc failed\n");
93 osi_Panic("ci_AddItem: malloc failed\n");
96 ti
->next
= (struct clone_items
*)0;
101 /* first dude in the list */
102 ah
->first
= ah
->last
= ti
;
107 /* now ti points to the end of the list, to a clone_item with room
108 * for at least one more element. Add it.
110 ti
->data
[ti
->nitems
++] = aino
;
114 /* initialize a clone header */
116 ci_InitHead(struct clone_head
*ah
)
118 memset(ah
, 0, sizeof(*ah
));
122 /* apply a function to all dudes in the set */
124 ci_Apply(struct clone_head
*ah
, int (*aproc
) (Inode
, void *), void *arock
)
126 struct clone_items
*ti
;
129 for (ti
= ah
->first
; ti
; ti
= ti
->next
) {
130 for (i
= 0; i
< ti
->nitems
; i
++) {
131 (*aproc
) (ti
->data
[i
], arock
);
137 /* free all dudes in the list */
139 ci_Destroy(struct clone_head
*ah
)
141 struct clone_items
*ti
, *ni
;
143 for (ti
= ah
->first
; ti
; ti
= ni
) {
144 ni
= ti
->next
; /* guard against freeing */
151 IDecProc(Inode adata
, void *arock
)
153 struct clone_rock
*aparm
= (struct clone_rock
*)arock
;
154 IH_DEC(aparm
->h
, adata
, aparm
->vol
);
160 DoCloneIndex(Volume
* rwvp
, Volume
* clvp
, VnodeClass
class, int reclone
)
162 afs_int32 code
, error
= 0;
163 FdHandle_t
*rwFd
= 0, *clFdIn
= 0, *clFdOut
= 0;
164 StreamHandle_t
*rwfile
= 0, *clfilein
= 0, *clfileout
= 0;
165 IHandle_t
*rwH
= 0, *clHin
= 0, *clHout
= 0;
166 char buf
[SIZEOF_LARGEDISKVNODE
], dbuf
[SIZEOF_LARGEDISKVNODE
];
167 struct VnodeDiskObject
*rwvnode
= (struct VnodeDiskObject
*)buf
;
168 struct VnodeDiskObject
*clvnode
= (struct VnodeDiskObject
*)dbuf
;
171 struct clone_head decHead
;
172 struct clone_rock decRock
;
173 afs_foff_t offset
= 0;
174 afs_int32 dircloned
, inodeinced
;
175 afs_int32 filecount
= 0, diskused
= 0;
178 struct VnodeClassInfo
*vcp
= &VnodeClassInfo
[class];
180 * The fileserver's -readonly switch should make this false, but we
181 * have no useful way to know in the volserver.
182 * This doesn't make client data mutable.
184 int ReadWriteOriginal
= 1;
186 /* Correct number of files in volume: this assumes indexes are always
187 cloned starting with vLarge */
188 if (ReadWriteOriginal
&& class != vLarge
) {
189 filecount
= V_filecount(rwvp
);
190 diskused
= V_diskused(rwvp
);
193 /* Initialize list of inodes to nuke - must do this before any calls
194 * to ERROR_EXIT, as the error handler requires an initialised list
196 ci_InitHead(&decHead
);
197 decRock
.h
= V_linkHandle(rwvp
);
198 decRock
.vol
= V_parentId(rwvp
);
200 /* Open the RW volume's index file and seek to beginning */
201 IH_COPY(rwH
, rwvp
->vnodeIndex
[class].handle
);
205 rwfile
= FDH_FDOPEN(rwFd
, ReadWriteOriginal
? "r+" : "r");
208 STREAM_ASEEK(rwfile
, vcp
->diskSize
); /* Will fail if no vnodes */
210 /* Open the clone volume's index file and seek to beginning */
211 IH_COPY(clHout
, clvp
->vnodeIndex
[class].handle
);
212 clFdOut
= IH_OPEN(clHout
);
215 clfileout
= FDH_FDOPEN(clFdOut
, "a");
218 code
= STREAM_ASEEK(clfileout
, vcp
->diskSize
);
222 /* If recloning, open the new volume's index; this time for
223 * reading. We never read anything that we're simultaneously
224 * writing, so this all works.
227 IH_COPY(clHin
, clvp
->vnodeIndex
[class].handle
);
228 clFdIn
= IH_OPEN(clHin
);
231 clfilein
= FDH_FDOPEN(clFdIn
, "r");
234 STREAM_ASEEK(clfilein
, vcp
->diskSize
); /* Will fail if no vnodes */
237 /* Read each vnode in the old volume's index file */
238 for (offset
= vcp
->diskSize
;
239 STREAM_READ(rwvnode
, vcp
->diskSize
, 1, rwfile
) == 1;
240 offset
+= vcp
->diskSize
) {
241 dircloned
= inodeinced
= 0;
243 /* If we are recloning the volume, read the corresponding vnode
244 * from the clone and determine its inode number.
246 if (reclone
&& !STREAM_EOF(clfilein
)
247 && (STREAM_READ(clvnode
, vcp
->diskSize
, 1, clfilein
) == 1)) {
248 clinode
= VNDISK_GET_INO(clvnode
);
253 if (rwvnode
->type
!= vNull
) {
256 if (rwvnode
->vnodeMagic
!= vcp
->magic
)
258 rwinode
= VNDISK_GET_INO(rwvnode
);
260 VNDISK_GET_LEN(ll
, rwvnode
);
261 diskused
+= nBlocks(ll
);
263 /* Increment the inode if not already */
264 if (clinode
&& (clinode
== rwinode
)) {
265 clinode
= 0; /* already cloned - don't delete later */
266 } else if (rwinode
) {
267 if (IH_INC(V_linkHandle(rwvp
), rwinode
, V_parentId(rwvp
)) ==
269 Log("IH_INC failed: %"AFS_PTR_FMT
", %s, %u errno %d\n",
270 V_linkHandle(rwvp
), PrintInode(stmp
, rwinode
),
271 V_parentId(rwvp
), errno
);
278 /* If a directory, mark vnode in old volume as cloned */
279 if ((rwvnode
->type
== vDirectory
) && ReadWriteOriginal
) {
282 * It is my firmly held belief that immediately after
283 * copy-on-write, the two directories can be identical.
284 * If the new copy is changed (presumably, that is the very
285 * next thing that will happen) then the dataVersion will
288 /* NOTE: the dataVersion++ is incredibly important!!!.
289 * This will cause the inode created by the file server
290 * on copy-on-write to be stamped with a dataVersion bigger
291 * than the current one. The salvager will then do the
293 rwvnode
->dataVersion
++;
296 code
= STREAM_ASEEK(rwfile
, offset
);
299 code
= STREAM_WRITE(rwvnode
, vcp
->diskSize
, 1, rwfile
);
303 code
= STREAM_ASEEK(rwfile
, offset
+ vcp
->diskSize
);
307 rwvnode
->dataVersion
--; /* Really needs to be set to the value in the inode,
308 * for the read-only volume */
313 /* Overwrite the vnode entry in the clone volume */
315 code
= STREAM_WRITE(rwvnode
, vcp
->diskSize
, 1, clfileout
);
318 /* Couldn't clone, go back and decrement the inode's link count */
320 if (IH_DEC(V_linkHandle(rwvp
), rwinode
, V_parentId(rwvp
)) ==
322 Log("IH_DEC failed: %"AFS_PTR_FMT
", %s, %u errno %d\n",
323 V_linkHandle(rwvp
), PrintInode(stmp
, rwinode
),
324 V_parentId(rwvp
), errno
);
329 /* And if the directory was marked clone, unmark it */
332 if (STREAM_ASEEK(rwfile
, offset
) != -1)
333 (void)STREAM_WRITE(rwvnode
, vcp
->diskSize
, 1, rwfile
);
338 /* Removal of the old cloned inode */
340 ci_AddItem(&decHead
, clinode
); /* just queue it */
345 if (STREAM_ERROR(clfileout
))
348 /* Clean out any junk at end of clone file */
350 STREAM_ASEEK(clfilein
, offset
);
351 while (STREAM_READ(clvnode
, vcp
->diskSize
, 1, clfilein
) == 1) {
352 if (clvnode
->type
!= vNull
&& VNDISK_GET_INO(clvnode
) != 0) {
353 ci_AddItem(&decHead
, VNDISK_GET_INO(clvnode
));
359 /* come here to finish up. If code is non-zero, we've already run into problems,
360 * and shouldn't do the idecs.
364 STREAM_CLOSE(rwfile
);
366 STREAM_CLOSE(clfilein
);
368 STREAM_CLOSE(clfileout
);
384 /* Next, we sync the disk. We have to reopen in case we're truncating,
385 * since we were using stdio above, and don't know when the buffers
386 * would otherwise be flushed. There's no stdio fftruncate call.
388 rwFd
= IH_OPEN(clvp
->vnodeIndex
[class].handle
);
394 /* If doing a reclone, we're keeping the clone. We need to
395 * truncate the file to offset bytes.
397 if (reclone
&& !error
) {
398 error
= FDH_TRUNC(rwFd
, offset
);
405 /* Now finally do the idec's. At this point, all potential
406 * references have been cleaned up and sent to the disk
407 * (see above fclose and fsync). No matter what happens, we
408 * no longer need to keep these references around.
410 code
= ci_Apply(&decHead
, IDecProc
, (char *)&decRock
);
413 ci_Destroy(&decHead
);
415 if (ReadWriteOriginal
&& filecount
> 0)
416 V_filecount(rwvp
) = filecount
;
417 if (ReadWriteOriginal
&& diskused
> 0)
418 V_diskused(rwvp
) = diskused
;
423 CloneVolume(Error
* rerror
, Volume
* original
, Volume
* new, Volume
* old
)
425 afs_int32 code
, error
= 0;
427 afs_int32 filecount
= V_filecount(original
), diskused
= V_diskused(original
);
430 reclone
= ((new == old
) ? 1 : 0);
432 code
= DoCloneIndex(original
, new, vLarge
, reclone
);
435 code
= DoCloneIndex(original
, new, vSmall
, reclone
);
438 if (filecount
!= V_filecount(original
) || diskused
!= V_diskused(original
))
439 Log("Clone %u: filecount %d -> %d diskused %d -> %d\n",
440 V_id(original
), filecount
, V_filecount(original
), diskused
, V_diskused(original
));
442 code
= CopyVolumeHeader(&V_disk(original
), &V_disk(new));