Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / vol / clone.c
blob3d20b8a7cbb5c638fc22bf520a1e5afc2b6d11f4
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
11 System: VICE-TWO
12 Module: clone.c
16 /* Clone a volume. Assumes the new volume is already created */
18 #include <afsconfig.h>
19 #include <afs/param.h>
22 #include <sys/types.h>
23 #include <stdio.h>
24 #include <afs/afs_assert.h>
25 #ifdef AFS_NT40_ENV
26 #include <fcntl.h>
27 #include <windows.h>
28 #include <winbase.h>
29 #include <io.h>
30 #include <time.h>
31 #else
32 #include <sys/file.h>
33 #include <sys/time.h>
34 #include <unistd.h>
35 #endif
36 #include <string.h>
37 #include <errno.h>
38 #include <sys/stat.h>
40 #include <rx/xdr.h>
41 #include <afs/afsint.h>
42 #include "nfs.h"
43 #include "lwp.h"
44 #include "lock.h"
45 #include <afs/afssyscalls.h>
46 #include "ihandle.h"
47 #include "vnode.h"
48 #include "volume.h"
49 #include "partition.h"
50 #include "viceinode.h"
51 #include "vol_prototypes.h"
52 #include "common.h"
54 int (*vol_PollProc) (void) = 0; /* someone must init this */
56 #define ERROR_EXIT(code) do { \
57 error = code; \
58 goto error_exit; \
59 } while (0)
61 /* parameters for idec call - this could just be an IHandle_t, but leaving
62 * open the possibility of decrementing the special files as well.
64 struct clone_rock {
65 IHandle_t *h;
66 VolId vol;
69 #define CLONE_MAXITEMS 100
70 struct clone_items {
71 struct clone_items *next;
72 afs_int32 nitems;
73 Inode data[CLONE_MAXITEMS];
76 struct clone_head {
77 struct clone_items *first;
78 struct clone_items *last;
81 void CloneVolume(Error *, Volume *, Volume *, Volume *);
83 static int
84 ci_AddItem(struct clone_head *ah, Inode aino)
86 struct clone_items *ti;
88 /* if no last elt (first call) or last item full, get a new one */
89 if ((!ah->last) || ah->last->nitems >= CLONE_MAXITEMS) {
90 ti = (struct clone_items *)malloc(sizeof(struct clone_items));
91 if (!ti) {
92 Log("ci_AddItem: malloc failed\n");
93 osi_Panic("ci_AddItem: malloc failed\n");
95 ti->nitems = 0;
96 ti->next = (struct clone_items *)0;
97 if (ah->last) {
98 ah->last->next = ti;
99 ah->last = ti;
100 } else {
101 /* first dude in the list */
102 ah->first = ah->last = ti;
104 } else
105 ti = ah->last;
107 /* now ti points to the end of the list, to a clone_item with room
108 * for at least one more element. Add it.
110 ti->data[ti->nitems++] = aino;
111 return 0;
114 /* initialize a clone header */
116 ci_InitHead(struct clone_head *ah)
118 memset(ah, 0, sizeof(*ah));
119 return 0;
122 /* apply a function to all dudes in the set */
124 ci_Apply(struct clone_head *ah, int (*aproc) (Inode, void *), void *arock)
126 struct clone_items *ti;
127 int i;
129 for (ti = ah->first; ti; ti = ti->next) {
130 for (i = 0; i < ti->nitems; i++) {
131 (*aproc) (ti->data[i], arock);
134 return 0;
137 /* free all dudes in the list */
139 ci_Destroy(struct clone_head *ah)
141 struct clone_items *ti, *ni;
143 for (ti = ah->first; ti; ti = ni) {
144 ni = ti->next; /* guard against freeing */
145 free(ti);
147 return 0;
150 static int
151 IDecProc(Inode adata, void *arock)
153 struct clone_rock *aparm = (struct clone_rock *)arock;
154 IH_DEC(aparm->h, adata, aparm->vol);
155 DOPOLL;
156 return 0;
159 afs_int32
160 DoCloneIndex(Volume * rwvp, Volume * clvp, VnodeClass class, int reclone)
162 afs_int32 code, error = 0;
163 FdHandle_t *rwFd = 0, *clFdIn = 0, *clFdOut = 0;
164 StreamHandle_t *rwfile = 0, *clfilein = 0, *clfileout = 0;
165 IHandle_t *rwH = 0, *clHin = 0, *clHout = 0;
166 char buf[SIZEOF_LARGEDISKVNODE], dbuf[SIZEOF_LARGEDISKVNODE];
167 struct VnodeDiskObject *rwvnode = (struct VnodeDiskObject *)buf;
168 struct VnodeDiskObject *clvnode = (struct VnodeDiskObject *)dbuf;
169 Inode rwinode = 0;
170 Inode clinode;
171 struct clone_head decHead;
172 struct clone_rock decRock;
173 afs_foff_t offset = 0;
174 afs_int32 dircloned, inodeinced;
175 afs_int32 filecount = 0, diskused = 0;
176 afs_ino_str_t stmp;
178 struct VnodeClassInfo *vcp = &VnodeClassInfo[class];
180 * The fileserver's -readonly switch should make this false, but we
181 * have no useful way to know in the volserver.
182 * This doesn't make client data mutable.
184 int ReadWriteOriginal = 1;
186 /* Correct number of files in volume: this assumes indexes are always
187 cloned starting with vLarge */
188 if (ReadWriteOriginal && class != vLarge) {
189 filecount = V_filecount(rwvp);
190 diskused = V_diskused(rwvp);
193 /* Initialize list of inodes to nuke - must do this before any calls
194 * to ERROR_EXIT, as the error handler requires an initialised list
196 ci_InitHead(&decHead);
197 decRock.h = V_linkHandle(rwvp);
198 decRock.vol = V_parentId(rwvp);
200 /* Open the RW volume's index file and seek to beginning */
201 IH_COPY(rwH, rwvp->vnodeIndex[class].handle);
202 rwFd = IH_OPEN(rwH);
203 if (!rwFd)
204 ERROR_EXIT(EIO);
205 rwfile = FDH_FDOPEN(rwFd, ReadWriteOriginal ? "r+" : "r");
206 if (!rwfile)
207 ERROR_EXIT(EIO);
208 STREAM_ASEEK(rwfile, vcp->diskSize); /* Will fail if no vnodes */
210 /* Open the clone volume's index file and seek to beginning */
211 IH_COPY(clHout, clvp->vnodeIndex[class].handle);
212 clFdOut = IH_OPEN(clHout);
213 if (!clFdOut)
214 ERROR_EXIT(EIO);
215 clfileout = FDH_FDOPEN(clFdOut, "a");
216 if (!clfileout)
217 ERROR_EXIT(EIO);
218 code = STREAM_ASEEK(clfileout, vcp->diskSize);
219 if (code)
220 ERROR_EXIT(EIO);
222 /* If recloning, open the new volume's index; this time for
223 * reading. We never read anything that we're simultaneously
224 * writing, so this all works.
226 if (reclone) {
227 IH_COPY(clHin, clvp->vnodeIndex[class].handle);
228 clFdIn = IH_OPEN(clHin);
229 if (!clFdIn)
230 ERROR_EXIT(EIO);
231 clfilein = FDH_FDOPEN(clFdIn, "r");
232 if (!clfilein)
233 ERROR_EXIT(EIO);
234 STREAM_ASEEK(clfilein, vcp->diskSize); /* Will fail if no vnodes */
237 /* Read each vnode in the old volume's index file */
238 for (offset = vcp->diskSize;
239 STREAM_READ(rwvnode, vcp->diskSize, 1, rwfile) == 1;
240 offset += vcp->diskSize) {
241 dircloned = inodeinced = 0;
243 /* If we are recloning the volume, read the corresponding vnode
244 * from the clone and determine its inode number.
246 if (reclone && !STREAM_EOF(clfilein)
247 && (STREAM_READ(clvnode, vcp->diskSize, 1, clfilein) == 1)) {
248 clinode = VNDISK_GET_INO(clvnode);
249 } else {
250 clinode = 0;
253 if (rwvnode->type != vNull) {
254 afs_fsize_t ll;
256 if (rwvnode->vnodeMagic != vcp->magic)
257 ERROR_EXIT(-1);
258 rwinode = VNDISK_GET_INO(rwvnode);
259 filecount++;
260 VNDISK_GET_LEN(ll, rwvnode);
261 diskused += nBlocks(ll);
263 /* Increment the inode if not already */
264 if (clinode && (clinode == rwinode)) {
265 clinode = 0; /* already cloned - don't delete later */
266 } else if (rwinode) {
267 if (IH_INC(V_linkHandle(rwvp), rwinode, V_parentId(rwvp)) ==
268 -1) {
269 Log("IH_INC failed: %"AFS_PTR_FMT", %s, %u errno %d\n",
270 V_linkHandle(rwvp), PrintInode(stmp, rwinode),
271 V_parentId(rwvp), errno);
272 VForceOffline(rwvp);
273 ERROR_EXIT(EIO);
275 inodeinced = 1;
278 /* If a directory, mark vnode in old volume as cloned */
279 if ((rwvnode->type == vDirectory) && ReadWriteOriginal) {
280 #ifdef DVINC
282 * It is my firmly held belief that immediately after
283 * copy-on-write, the two directories can be identical.
284 * If the new copy is changed (presumably, that is the very
285 * next thing that will happen) then the dataVersion will
286 * get bumped.
288 /* NOTE: the dataVersion++ is incredibly important!!!.
289 * This will cause the inode created by the file server
290 * on copy-on-write to be stamped with a dataVersion bigger
291 * than the current one. The salvager will then do the
292 * right thing */
293 rwvnode->dataVersion++;
294 #endif /* DVINC */
295 rwvnode->cloned = 1;
296 code = STREAM_ASEEK(rwfile, offset);
297 if (code == -1)
298 goto clonefailed;
299 code = STREAM_WRITE(rwvnode, vcp->diskSize, 1, rwfile);
300 if (code != 1)
301 goto clonefailed;
302 dircloned = 1;
303 code = STREAM_ASEEK(rwfile, offset + vcp->diskSize);
304 if (code == -1)
305 goto clonefailed;
306 #ifdef DVINC
307 rwvnode->dataVersion--; /* Really needs to be set to the value in the inode,
308 * for the read-only volume */
309 #endif /* DVINC */
313 /* Overwrite the vnode entry in the clone volume */
314 rwvnode->cloned = 0;
315 code = STREAM_WRITE(rwvnode, vcp->diskSize, 1, clfileout);
316 if (code != 1) {
317 clonefailed:
318 /* Couldn't clone, go back and decrement the inode's link count */
319 if (inodeinced) {
320 if (IH_DEC(V_linkHandle(rwvp), rwinode, V_parentId(rwvp)) ==
321 -1) {
322 Log("IH_DEC failed: %"AFS_PTR_FMT", %s, %u errno %d\n",
323 V_linkHandle(rwvp), PrintInode(stmp, rwinode),
324 V_parentId(rwvp), errno);
325 VForceOffline(rwvp);
326 ERROR_EXIT(EIO);
329 /* And if the directory was marked clone, unmark it */
330 if (dircloned) {
331 rwvnode->cloned = 0;
332 if (STREAM_ASEEK(rwfile, offset) != -1)
333 (void)STREAM_WRITE(rwvnode, vcp->diskSize, 1, rwfile);
335 ERROR_EXIT(EIO);
338 /* Removal of the old cloned inode */
339 if (clinode) {
340 ci_AddItem(&decHead, clinode); /* just queue it */
343 DOPOLL;
345 if (STREAM_ERROR(clfileout))
346 ERROR_EXIT(EIO);
348 /* Clean out any junk at end of clone file */
349 if (reclone) {
350 STREAM_ASEEK(clfilein, offset);
351 while (STREAM_READ(clvnode, vcp->diskSize, 1, clfilein) == 1) {
352 if (clvnode->type != vNull && VNDISK_GET_INO(clvnode) != 0) {
353 ci_AddItem(&decHead, VNDISK_GET_INO(clvnode));
355 DOPOLL;
359 /* come here to finish up. If code is non-zero, we've already run into problems,
360 * and shouldn't do the idecs.
362 error_exit:
363 if (rwfile)
364 STREAM_CLOSE(rwfile);
365 if (clfilein)
366 STREAM_CLOSE(clfilein);
367 if (clfileout)
368 STREAM_CLOSE(clfileout);
370 if (rwFd)
371 FDH_CLOSE(rwFd);
372 if (clFdIn)
373 FDH_CLOSE(clFdIn);
374 if (clFdOut)
375 FDH_CLOSE(clFdOut);
377 if (rwH)
378 IH_RELEASE(rwH);
379 if (clHout)
380 IH_RELEASE(clHout);
381 if (clHin)
382 IH_RELEASE(clHin);
384 /* Next, we sync the disk. We have to reopen in case we're truncating,
385 * since we were using stdio above, and don't know when the buffers
386 * would otherwise be flushed. There's no stdio fftruncate call.
388 rwFd = IH_OPEN(clvp->vnodeIndex[class].handle);
389 if (rwFd == NULL) {
390 if (!error)
391 error = EIO;
392 } else {
393 if (reclone) {
394 /* If doing a reclone, we're keeping the clone. We need to
395 * truncate the file to offset bytes.
397 if (reclone && !error) {
398 error = FDH_TRUNC(rwFd, offset);
401 FDH_SYNC(rwFd);
402 FDH_CLOSE(rwFd);
405 /* Now finally do the idec's. At this point, all potential
406 * references have been cleaned up and sent to the disk
407 * (see above fclose and fsync). No matter what happens, we
408 * no longer need to keep these references around.
410 code = ci_Apply(&decHead, IDecProc, (char *)&decRock);
411 if (!error)
412 error = code;
413 ci_Destroy(&decHead);
415 if (ReadWriteOriginal && filecount > 0)
416 V_filecount(rwvp) = filecount;
417 if (ReadWriteOriginal && diskused > 0)
418 V_diskused(rwvp) = diskused;
419 return error;
422 void
423 CloneVolume(Error * rerror, Volume * original, Volume * new, Volume * old)
425 afs_int32 code, error = 0;
426 afs_int32 reclone;
427 afs_int32 filecount = V_filecount(original), diskused = V_diskused(original);
429 *rerror = 0;
430 reclone = ((new == old) ? 1 : 0);
432 code = DoCloneIndex(original, new, vLarge, reclone);
433 if (code)
434 ERROR_EXIT(code);
435 code = DoCloneIndex(original, new, vSmall, reclone);
436 if (code)
437 ERROR_EXIT(code);
438 if (filecount != V_filecount(original) || diskused != V_diskused(original))
439 Log("Clone %u: filecount %d -> %d diskused %d -> %d\n",
440 V_id(original), filecount, V_filecount(original), diskused, V_diskused(original));
442 code = CopyVolumeHeader(&V_disk(original), &V_disk(new));
443 if (code)
444 ERROR_EXIT(code);
446 error_exit:
447 *rerror = error;