2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
12 * Module: vol-salvage.c
13 * Institution: The Information Technology Center, Carnegie-Mellon University
17 Correct handling of bad "." and ".." entries.
18 Message if volume has "destroyMe" flag set--but doesn't delete yet.
19 Link count bug fixed--bug was that vnodeEssence link count was unsigned
20 14 bits. Needs to be signed.
23 Change to DirHandle stuff to make sure that cache entries are reused at the
24 right time (this parallels the file server change, but is not identical).
26 Added calls to directory salvager routines; doesn't salvage dir unless debug=1.
29 Fixed bug which was causing inode link counts to go bad (thus leaking
31 Vnodes with 0 inode pointers in RW volumes are now deleted.
32 An inode with a matching inode number to the vnode is preferred to an
33 inode with a higer data version.
34 Bug is probably fixed that was causing data version to remain wrong,
35 despite assurances from the salvager to the contrary.
38 Added limited salvaging: unless ForceSalvage is on, then the volume will
39 not be salvaged if the dontSalvage flag is set in the Volume Header.
40 The ForceSalvage flag is turned on if an individual volume is salvaged or
41 if the file FORCESALVAGE exists in the partition header of the file system
42 being salvaged. This isn't used for anything but could be set by vfsck.
43 A -f flag was also added to force salvage.
46 It now deletes obsolete volume inodes without complaining
49 Repairs rw volume headers (again).
52 Correlates volume headers & inodes correctly, thus preventing occasional deletion
53 of read-only volumes...
54 No longer forces a directory salvage for volume 144 (which may be a good volume
56 Some of the messages are cleaned up or made more explicit. One or two added.
58 A bug was fixed which forced salvage of read-only volumes without a corresponding
62 When a volume header is recreated, the new name will be "bogus.volume#"
65 Directory salvaging turned on!!!
68 Prints warning messages for setuid programs.
71 Logs missing inode numbers.
74 Increments directory version number by 200 (rather than by 1) when it is salvaged, in order to prevent problems due to the fact that a version number can be promised to a workstation before it is written to disk. If the server crashes, it may have an older version. Salvaging it could bring the version number up to the same version the workstation believed it already had a call back on.
77 Locks the file /vice/vol/salvage.lock before starting. Aborts if it can't acquire the lock.
78 Time stamps on log entries.
79 Fcntl on stdout to cause all entries to be appended.
80 Problems writing to temporary files are now all detected.
81 Inode summary files are now dynamically named (so that multiple salvagers wouldn't conflict).
82 Some cleanup of error messages.
86 #include <afsconfig.h>
87 #include <afs/param.h>
91 #include <sys/param.h>
95 #endif /* ITIMER_REAL */
101 #include <sys/stat.h>
106 #include <WINNT/afsevent.h>
109 #define WCOREDUMP(x) ((x) & 0200)
112 #include <afs/afsint.h>
113 #include <afs/afs_assert.h>
114 #if !defined(AFS_SGI_ENV) && !defined(AFS_NT40_ENV)
115 #if defined(AFS_VFSINCL_ENV)
116 #include <sys/vnode.h>
118 #include <sys/fs/ufs_inode.h>
120 #if defined(AFS_DARWIN_ENV) || defined(AFS_XBSD_ENV)
121 #include <ufs/ufs/dinode.h>
122 #include <ufs/ffs/fs.h>
124 #include <ufs/inode.h>
127 #else /* AFS_VFSINCL_ENV */
129 #include <ufs/inode.h>
130 #else /* AFS_OSF_ENV */
131 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_XBSD_ENV) && !defined(AFS_DARWIN_ENV)
132 #include <sys/inode.h>
135 #endif /* AFS_VFSINCL_ENV */
136 #endif /* AFS_SGI_ENV */
139 #include <sys/lockf.h>
143 #include <checklist.h>
145 #if defined(AFS_SGI_ENV)
150 #if defined(AFS_SUN_ENV) || defined(AFS_SUN5_ENV)
153 #include <sys/mnttab.h>
154 #include <sys/mntent.h>
159 #endif /* AFS_SGI_ENV */
160 #endif /* AFS_HPUX_ENV */
165 #include <afs/osi_inode.h>
169 #include <afs/afsutil.h>
170 #include <afs/fileutil.h>
171 #include <afs/procmgmt.h> /* signal(), kill(), wait(), etc. */
179 #include <afs/afssyscalls.h>
183 #include "partition.h"
184 #include "daemon_com.h"
185 #include "daemon_com_inline.h"
187 #include "fssync_inline.h"
188 #include "volume_inline.h"
189 #include "salvsync.h"
190 #include "viceinode.h"
192 #include "volinodes.h" /* header magic number, etc. stuff */
193 #include "vol-salvage.h"
195 #include "vol_internal.h"
197 #include <afs/prs_fs.h>
199 #ifdef FSSYNC_BUILD_CLIENT
200 #include "vg_cache.h"
207 /*@+fcnmacros +macrofcndecl@*/
210 extern off64_t
afs_lseek(int FD
, off64_t O
, int F
);
211 #endif /*S_SPLINT_S */
212 #define afs_lseek(FD, O, F) lseek64(FD, (off64_t) (O), F)
213 #define afs_stat stat64
214 #define afs_fstat fstat64
215 #define afs_open open64
216 #define afs_fopen fopen64
217 #else /* !O_LARGEFILE */
219 extern off_t
afs_lseek(int FD
, off_t O
, int F
);
220 #endif /*S_SPLINT_S */
221 #define afs_lseek(FD, O, F) lseek(FD, (off_t) (O), F)
222 #define afs_stat stat
223 #define afs_fstat fstat
224 #define afs_open open
225 #define afs_fopen fopen
226 #endif /* !O_LARGEFILE */
227 /*@=fcnmacros =macrofcndecl@*/
230 extern void *calloc();
232 static char *TimeStamp(time_t clock
, int precision
);
235 int debug
; /* -d flag */
236 extern int Testing
; /* -n flag */
237 int ListInodeOption
; /* -i flag */
238 int ShowRootFiles
; /* -r flag */
239 int RebuildDirs
; /* -sal flag */
240 int Parallel
= 4; /* -para X flag */
241 int PartsPerDisk
= 8; /* Salvage up to 8 partitions on same disk sequentially */
242 int forceR
= 0; /* -b flag */
243 int ShowLog
= 0; /* -showlog flag */
244 int ShowSuid
= 0; /* -showsuid flag */
245 int ShowMounts
= 0; /* -showmounts flag */
246 int orphans
= ORPH_IGNORE
; /* -orphans option */
251 int useSyslog
= 0; /* -syslog flag */
252 int useSyslogFacility
= LOG_DAEMON
; /* -syslogfacility option */
261 #define MAXPARALLEL 32
263 int OKToZap
; /* -o flag */
264 int ForceSalvage
; /* If salvage should occur despite the DONT_SALVAGE flag
265 * in the volume header */
267 FILE *logFile
= 0; /* one of {/usr/afs/logs,/vice/file}/SalvageLog */
269 #define ROOTINODE 2 /* Root inode of a 4.2 Unix file system
272 * information that is 'global' to a particular salvage job.
275 Device fileSysDevice
; /**< The device number of the current partition
277 char fileSysPath
[9]; /**< The path of the mounted partition currently
278 * being salvaged, i.e. the directory containing
279 * the volume headers */
280 char *fileSysPathName
; /**< NT needs this to make name pretty log. */
281 IHandle_t
*VGLinkH
; /**< Link handle for current volume group. */
282 int VGLinkH_cnt
; /**< # of references to lnk handle. */
283 struct DiskPartition64
*fileSysPartition
; /**< Partition being salvaged */
286 char *fileSysDeviceName
; /**< The block device where the file system being
287 * salvaged was mounted */
288 char *filesysfulldev
;
290 int VolumeChanged
; /**< Set by any routine which would change the
291 * volume in a way which would require callbacks
292 * to be broken if the volume was put back on
293 * on line by an active file server */
295 VolumeDiskData VolInfo
; /**< A copy of the last good or salvaged volume
296 * header dealt with */
298 int nVolumesInInodeFile
; /**< Number of read-write volumes summarized */
299 int inodeFd
; /**< File descriptor for inode file */
301 struct VolumeSummary
*volumeSummaryp
; /**< Holds all the volumes in a part */
302 int nVolumes
; /**< Number of volumes (read-write and read-only)
303 * in volume summary */
304 struct InodeSummary
*inodeSummary
; /**< contains info on all the relevant
307 struct VnodeInfo vnodeInfo
[nVNODECLASSES
]; /**< contains info on all of the
308 * vnodes in the volume that
309 * we are currently looking
311 int useFSYNC
; /**< 0 if the fileserver is unavailable; 1 if we should try
312 * to contact the fileserver over FSYNC */
319 /* Forward declarations */
320 static int IsVnodeOrphaned(struct SalvInfo
*salvinfo
, VnodeId vnode
);
321 static int AskVolumeSummary(struct SalvInfo
*salvinfo
,
322 VolumeId singleVolumeNumber
);
323 static void MaybeAskOnline(struct SalvInfo
*salvinfo
, VolumeId volumeId
);
324 static void AskError(struct SalvInfo
*salvinfo
, VolumeId volumeId
);
326 #ifdef AFS_DEMAND_ATTACH_FS
327 static int LockVolume(struct SalvInfo
*salvinfo
, VolumeId volumeId
);
328 #endif /* AFS_DEMAND_ATTACH_FS */
330 /* Uniquifier stored in the Inode */
335 return (u
& 0x3fffff);
337 #if defined(AFS_SGI_EXMAG)
338 return (u
& SGI_UNIQMASK
);
341 #endif /* AFS_SGI_EXMAG */
348 if (aerror
== EPERM
|| aerror
== ENXIO
|| aerror
== ENOENT
)
350 return 0; /* otherwise may be transient, e.g. EMFILE */
355 char *save_args
[MAX_ARGS
];
357 extern pthread_t main_thread
;
358 childJob_t myjob
= { SALVAGER_MAGIC
, NOT_CHILD
, "" };
362 * Get the salvage lock if not already held. Hold until process exits.
364 * @param[in] locktype READ_LOCK or WRITE_LOCK
367 _ObtainSalvageLock(int locktype
)
369 struct VLockFile salvageLock
;
374 VLockFileInit(&salvageLock
, AFSDIR_SERVER_SLVGLOCK_FILEPATH
);
376 code
= VLockFileLock(&salvageLock
, offset
, locktype
, nonblock
);
379 "salvager: There appears to be another salvager running! "
384 "salvager: Error %d trying to acquire salvage lock! "
390 ObtainSalvageLock(void)
392 _ObtainSalvageLock(WRITE_LOCK
);
395 ObtainSharedSalvageLock(void)
397 _ObtainSalvageLock(READ_LOCK
);
401 #ifdef AFS_SGI_XFS_IOPS_ENV
402 /* Check if the given partition is mounted. For XFS, the root inode is not a
403 * constant. So we check the hard way.
406 IsPartitionMounted(char *part
)
409 struct mntent
*mntent
;
411 osi_Assert(mntfp
= setmntent(MOUNTED
, "r"));
412 while (mntent
= getmntent(mntfp
)) {
413 if (!strcmp(part
, mntent
->mnt_dir
))
418 return mntent
? 1 : 1;
421 /* Check if the given inode is the root of the filesystem. */
422 #ifndef AFS_SGI_XFS_IOPS_ENV
424 IsRootInode(struct afs_stat
*status
)
427 * The root inode is not a fixed value in XFS partitions. So we need to
428 * see if the partition is in the list of mounted partitions. This only
429 * affects the SalvageFileSys path, so we check there.
431 return (status
->st_ino
== ROOTINODE
);
436 #ifndef AFS_NAMEI_ENV
437 /* We don't want to salvage big files filesystems, since we can't put volumes on
441 CheckIfBigFilesFS(char *mountPoint
, char *devName
)
443 struct superblock fs
;
446 if (strncmp(devName
, "/dev/", 5)) {
447 (void)sprintf(name
, "/dev/%s", devName
);
449 (void)strcpy(name
, devName
);
452 if (ReadSuper(&fs
, name
) < 0) {
453 Log("Unable to read superblock. Not salvaging partition %s.\n",
457 if (IsBigFilesFileSystem(&fs
)) {
458 Log("Partition %s is a big files filesystem, not salvaging.\n",
468 #define HDSTR "\\Device\\Harddisk"
469 #define HDLEN (sizeof(HDSTR)-1) /* Length of "\Device\Harddisk" */
471 SameDisk(struct DiskPartition64
*p1
, struct DiskPartition64
*p2
)
477 static int dowarn
= 1;
479 if (!QueryDosDevice(p1
->devName
, res1
, RES_LEN
- 1))
481 if (strncmp(res1
, HDSTR
, HDLEN
)) {
484 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
485 res1
, HDSTR
, p1
->devName
);
488 if (!QueryDosDevice(p2
->devName
, res2
, RES_LEN
- 1))
490 if (strncmp(res2
, HDSTR
, HDLEN
)) {
493 Log("WARNING: QueryDosDevice is returning %s, not %s for %s\n",
494 res2
, HDSTR
, p2
->devName
);
498 return (0 == _strnicmp(res1
, res2
, RES_LEN
- 1));
501 #define SameDisk(P1, P2) ((P1)->device/PartsPerDisk == (P2)->device/PartsPerDisk)
504 /* This assumes that two partitions with the same device number divided by
505 * PartsPerDisk are on the same disk.
508 SalvageFileSysParallel(struct DiskPartition64
*partP
)
511 struct DiskPartition64
*partP
;
512 int pid
; /* Pid for this job */
513 int jobnumb
; /* Log file job number */
514 struct job
*nextjob
; /* Next partition on disk to salvage */
516 static struct job
*jobs
[MAXPARALLEL
] = { 0 }; /* Need to zero this */
517 struct job
*thisjob
= 0;
518 static int numjobs
= 0;
519 static int jobcount
= 0;
525 char logFileName
[256];
529 /* We have a partition to salvage. Copy it into thisjob */
530 thisjob
= (struct job
*)malloc(sizeof(struct job
));
532 Log("Can't salvage '%s'. Not enough memory\n", partP
->name
);
535 memset(thisjob
, 0, sizeof(struct job
));
536 thisjob
->partP
= partP
;
537 thisjob
->jobnumb
= jobcount
;
539 } else if (jobcount
== 0) {
540 /* We are asking to wait for all jobs (partp == 0), yet we never
543 Log("No file system partitions named %s* found; not salvaged\n",
544 VICE_PARTITION_PREFIX
);
548 if (debug
|| Parallel
== 1) {
550 SalvageFileSys(thisjob
->partP
, 0);
557 /* Check to see if thisjob is for a disk that we are already
558 * salvaging. If it is, link it in as the next job to do. The
559 * jobs array has 1 entry per disk being salvages. numjobs is
560 * the total number of disks currently being salvaged. In
561 * order to keep thejobs array compact, when a disk is
562 * completed, the hightest element in the jobs array is moved
563 * down to now open slot.
565 for (j
= 0; j
< numjobs
; j
++) {
566 if (SameDisk(jobs
[j
]->partP
, thisjob
->partP
)) {
567 /* On same disk, add it to this list and return */
568 thisjob
->nextjob
= jobs
[j
]->nextjob
;
569 jobs
[j
]->nextjob
= thisjob
;
576 /* Loop until we start thisjob or until all existing jobs are finished */
577 while (thisjob
|| (!partP
&& (numjobs
> 0))) {
578 startjob
= -1; /* No new job to start */
580 if ((numjobs
>= Parallel
) || (!partP
&& (numjobs
> 0))) {
581 /* Either the max jobs are running or we have to wait for all
582 * the jobs to finish. In either case, we wait for at least one
583 * job to finish. When it's done, clean up after it.
585 pid
= wait(&wstatus
);
586 osi_Assert(pid
!= -1);
587 for (j
= 0; j
< numjobs
; j
++) { /* Find which job it is */
588 if (pid
== jobs
[j
]->pid
)
591 osi_Assert(j
< numjobs
);
592 if (WCOREDUMP(wstatus
)) { /* Say if the job core dumped */
593 Log("Salvage of %s core dumped!\n", jobs
[j
]->partP
->name
);
596 numjobs
--; /* job no longer running */
597 oldjob
= jobs
[j
]; /* remember */
598 jobs
[j
] = jobs
[j
]->nextjob
; /* Step to next part on same disk */
599 free(oldjob
); /* free the old job */
601 /* If there is another partition on the disk to salvage, then
602 * say we will start it (startjob). If not, then put thisjob there
603 * and say we will start it.
605 if (jobs
[j
]) { /* Another partitions to salvage */
606 startjob
= j
; /* Will start it */
607 } else { /* There is not another partition to salvage */
609 jobs
[j
] = thisjob
; /* Add thisjob */
611 startjob
= j
; /* Will start it */
613 jobs
[j
] = jobs
[numjobs
]; /* Move last job up to this slot */
614 startjob
= -1; /* Don't start it - already running */
618 /* We don't have to wait for a job to complete */
620 jobs
[numjobs
] = thisjob
; /* Add this job */
622 startjob
= numjobs
; /* Will start it */
626 /* Start up a new salvage job on a partition in job slot "startjob" */
627 if (startjob
!= -1) {
629 Log("Starting salvage of file system partition %s\n",
630 jobs
[startjob
]->partP
->name
);
632 /* For NT, we not only fork, but re-exec the salvager. Pass in the
633 * commands and pass the child job number via the data path.
636 nt_SalvagePartition(jobs
[startjob
]->partP
->name
,
637 jobs
[startjob
]->jobnumb
);
638 jobs
[startjob
]->pid
= pid
;
643 jobs
[startjob
]->pid
= pid
;
649 for (fd
= 0; fd
< 16; fd
++)
656 openlog("salvager", LOG_PID
, useSyslogFacility
);
660 (void)afs_snprintf(logFileName
, sizeof logFileName
,
662 AFSDIR_SERVER_SLVGLOG_FILEPATH
,
663 jobs
[startjob
]->jobnumb
);
664 logFile
= afs_fopen(logFileName
, "w");
669 SalvageFileSys1(jobs
[startjob
]->partP
, 0);
674 } /* while ( thisjob || (!partP && numjobs > 0) ) */
676 /* If waited for all jobs to complete, now collect log files and return */
678 if (!useSyslog
) /* if syslogging - no need to collect */
681 for (i
= 0; i
< jobcount
; i
++) {
682 (void)afs_snprintf(logFileName
, sizeof logFileName
, "%s.%d",
683 AFSDIR_SERVER_SLVGLOG_FILEPATH
, i
);
684 if ((passLog
= afs_fopen(logFileName
, "r"))) {
685 while (fgets(buf
, sizeof(buf
), passLog
)) {
690 (void)unlink(logFileName
);
699 SalvageFileSys(struct DiskPartition64
*partP
, VolumeId singleVolumeNumber
)
701 if (!canfork
|| debug
|| Fork() == 0) {
702 SalvageFileSys1(partP
, singleVolumeNumber
);
703 if (canfork
&& !debug
) {
708 Wait("SalvageFileSys");
712 get_DevName(char *pbuffer
, char *wpath
)
714 char pbuf
[128], *ptr
;
715 strcpy(pbuf
, pbuffer
);
716 ptr
= (char *)strrchr(pbuf
, OS_DIRSEPC
);
722 ptr
= (char *)strrchr(pbuffer
, OS_DIRSEPC
);
724 strcpy(pbuffer
, ptr
+ 1);
731 SalvageFileSys1(struct DiskPartition64
*partP
, VolumeId singleVolumeNumber
)
734 char inodeListPath
[256];
735 FILE *inodeFile
= NULL
;
736 static char tmpDevName
[100];
737 static char wpath
[100];
738 struct VolumeSummary
*vsp
, *esp
;
742 struct SalvInfo l_salvinfo
;
743 struct SalvInfo
*salvinfo
= &l_salvinfo
;
746 memset(salvinfo
, 0, sizeof(*salvinfo
));
753 if (tries
> VOL_MAX_CHECKOUT_RETRIES
) {
754 Abort("Raced too many times with fileserver restarts while trying to "
755 "checkout/lock volumes; Aborted\n");
757 #ifdef AFS_DEMAND_ATTACH_FS
759 /* unlock all previous volume locks, since we're about to lock them
761 VLockFileReinit(&partP
->volLockFile
);
763 #endif /* AFS_DEMAND_ATTACH_FS */
765 salvinfo
->fileSysPartition
= partP
;
766 salvinfo
->fileSysDevice
= salvinfo
->fileSysPartition
->device
;
767 salvinfo
->fileSysPathName
= VPartitionPath(salvinfo
->fileSysPartition
);
770 /* Opendir can fail on "C:" but not on "C:\" if C is empty! */
771 (void)sprintf(salvinfo
->fileSysPath
, "%s" OS_DIRSEP
, salvinfo
->fileSysPathName
);
772 name
= partP
->devName
;
774 strlcpy(salvinfo
->fileSysPath
, salvinfo
->fileSysPathName
, sizeof(salvinfo
->fileSysPath
));
775 strcpy(tmpDevName
, partP
->devName
);
776 name
= get_DevName(tmpDevName
, wpath
);
777 salvinfo
->fileSysDeviceName
= name
;
778 salvinfo
->filesysfulldev
= wpath
;
781 if (singleVolumeNumber
) {
782 #ifndef AFS_DEMAND_ATTACH_FS
783 /* only non-DAFS locks the partition when salvaging a single volume;
784 * DAFS will lock the individual volumes in the VG */
785 VLockPartition(partP
->name
);
786 #endif /* !AFS_DEMAND_ATTACH_FS */
790 /* salvageserver already setup fssync conn for us */
791 if ((programType
!= salvageServer
) && !VConnectFS()) {
792 Abort("Couldn't connect to file server\n");
795 salvinfo
->useFSYNC
= 1;
796 AskOffline(salvinfo
, singleVolumeNumber
);
797 #ifdef AFS_DEMAND_ATTACH_FS
798 if (LockVolume(salvinfo
, singleVolumeNumber
)) {
801 #endif /* AFS_DEMAND_ATTACH_FS */
804 salvinfo
->useFSYNC
= 0;
805 VLockPartition(partP
->name
);
809 ForceSalvage
= UseTheForceLuke(salvinfo
->fileSysPath
);
812 Log("SALVAGING FILE SYSTEM PARTITION %s (device=%s%s)\n",
813 partP
->name
, name
, (Testing
? "(READONLY mode)" : ""));
815 Log("***Forced salvage of all volumes on this partition***\n");
820 * Remove any leftover /vicepa/salvage.inodes.* or /vicepa/salvage.temp.*
827 osi_Assert((dirp
= opendir(salvinfo
->fileSysPath
)) != NULL
);
828 while ((dp
= readdir(dirp
))) {
829 if (!strncmp(dp
->d_name
, "salvage.inodes.", 15)
830 || !strncmp(dp
->d_name
, "salvage.temp.", 13)) {
832 Log("Removing old salvager temp files %s\n", dp
->d_name
);
833 strcpy(npath
, salvinfo
->fileSysPath
);
834 strcat(npath
, OS_DIRSEP
);
835 strcat(npath
, dp
->d_name
);
841 tdir
= (tmpdir
? tmpdir
: salvinfo
->fileSysPath
);
843 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
844 (void)strncpy(inodeListPath
, _tempnam(tdir
, "salvage.inodes."), 255);
846 snprintf(inodeListPath
, 255, "%s" OS_DIRSEP
"salvage.inodes.%s.%d", tdir
, name
,
850 inodeFile
= fopen(inodeListPath
, "w+b");
852 Abort("Error %d when creating inode description file %s; not salvaged\n", errno
, inodeListPath
);
855 /* Using nt_unlink here since we're really using the delete on close
856 * semantics of unlink. In most places in the salvager, we really do
857 * mean to unlink the file at that point. Those places have been
858 * modified to actually do that so that the NT crt can be used there.
860 * jaltman - On NT delete on close cannot be applied to a file while the
861 * process has an open file handle that does not have DELETE file
862 * access and FILE_SHARE_DELETE. fopen() calls CreateFile() without
863 * delete privileges. As a result the nt_unlink() call will always
866 code
= nt_unlink(inodeListPath
);
868 code
= unlink(inodeListPath
);
871 Log("Error %d when trying to unlink %s\n", errno
, inodeListPath
);
874 if (GetInodeSummary(salvinfo
, inodeFile
, singleVolumeNumber
) < 0) {
876 if (singleVolumeNumber
) {
877 /* the volume group -- let alone the volume -- does not exist,
878 * but we checked it out, so give it back to the fileserver */
879 AskDelete(salvinfo
, singleVolumeNumber
);
883 salvinfo
->inodeFd
= fileno(inodeFile
);
884 if (salvinfo
->inodeFd
== -1)
885 Abort("Temporary file %s is missing...\n", inodeListPath
);
886 afs_lseek(salvinfo
->inodeFd
, 0L, SEEK_SET
);
887 if (ListInodeOption
) {
888 PrintInodeList(salvinfo
);
889 if (singleVolumeNumber
) {
890 /* We've checked out the volume from the fileserver, and we need
891 * to give it back. We don't know if the volume exists or not,
892 * so we don't know whether to AskOnline or not. Try to determine
893 * if the volume exists by trying to read the volume header, and
894 * AskOnline if it is readable. */
895 MaybeAskOnline(salvinfo
, singleVolumeNumber
);
899 /* enumerate volumes in the partition.
900 * figure out sets of read-only + rw volumes.
901 * salvage each set, read-only volumes first, then read-write.
902 * Fix up inodes on last volume in set (whether it is read-write
905 if (GetVolumeSummary(salvinfo
, singleVolumeNumber
)) {
909 if (singleVolumeNumber
) {
910 /* If we delete a volume during the salvage, we indicate as such by
911 * setting the volsummary->deleted field. We need to know if we
912 * deleted a volume or not in order to know which volumes to bring
913 * back online after the salvage. If we fork, we will lose this
914 * information, since volsummary->deleted will not get set in the
915 * parent. So, don't fork. */
919 for (i
= j
= 0, vsp
= salvinfo
->volumeSummaryp
, esp
= vsp
+ salvinfo
->nVolumes
;
920 i
< salvinfo
->nVolumesInInodeFile
; i
= j
) {
921 VolumeId rwvid
= salvinfo
->inodeSummary
[i
].RWvolumeId
;
923 j
< salvinfo
->nVolumesInInodeFile
&& salvinfo
->inodeSummary
[j
].RWvolumeId
== rwvid
;
925 VolumeId vid
= salvinfo
->inodeSummary
[j
].volumeId
;
926 struct VolumeSummary
*tsp
;
927 /* Scan volume list (from partition root directory) looking for the
928 * current rw volume number in the volume list from the inode scan.
929 * If there is one here that is not in the inode volume list,
931 for (; vsp
< esp
&& (vsp
->header
.parent
< rwvid
); vsp
++) {
933 DeleteExtraVolumeHeaderFile(salvinfo
, vsp
);
935 /* Now match up the volume summary info from the root directory with the
936 * entry in the volume list obtained from scanning inodes */
937 salvinfo
->inodeSummary
[j
].volSummary
= NULL
;
938 for (tsp
= vsp
; tsp
< esp
&& (tsp
->header
.parent
== rwvid
); tsp
++) {
939 if (tsp
->header
.id
== vid
) {
940 salvinfo
->inodeSummary
[j
].volSummary
= tsp
;
946 /* Salvage the group of volumes (several read-only + 1 read/write)
947 * starting with the current read-only volume we're looking at.
949 SalvageVolumeGroup(salvinfo
, &salvinfo
->inodeSummary
[i
], j
- i
);
952 /* Delete any additional volumes that were listed in the partition but which didn't have any corresponding inodes */
953 for (; vsp
< esp
; vsp
++) {
955 DeleteExtraVolumeHeaderFile(salvinfo
, vsp
);
958 if (!singleVolumeNumber
) /* Remove the FORCESALVAGE file */
959 RemoveTheForce(salvinfo
->fileSysPath
);
961 if (!Testing
&& singleVolumeNumber
) {
963 #ifdef AFS_DEMAND_ATTACH_FS
964 /* unlock vol headers so the fs can attach them when we AskOnline */
965 VLockFileReinit(&salvinfo
->fileSysPartition
->volLockFile
);
966 #endif /* AFS_DEMAND_ATTACH_FS */
968 /* Step through the volumeSummary list and set all volumes on-line.
969 * Most volumes were taken off-line in GetVolumeSummary.
970 * If a volume was deleted, don't tell the fileserver anything, since
971 * we already told the fileserver the volume was deleted back when we
972 * we destroyed the volume header.
973 * Also, make sure we bring the singleVolumeNumber back online first.
976 for (j
= 0; j
< salvinfo
->nVolumes
; j
++) {
977 if (salvinfo
->volumeSummaryp
[j
].header
.id
== singleVolumeNumber
) {
979 if (!salvinfo
->volumeSummaryp
[j
].deleted
) {
980 AskOnline(salvinfo
, singleVolumeNumber
);
986 /* If singleVolumeNumber is not in our volumeSummary, it means that
987 * at least one other volume in the VG is on the partition, but the
988 * RW volume is not. We've already AskOffline'd it by now, though,
989 * so make sure we don't still have the volume checked out. */
990 AskDelete(salvinfo
, singleVolumeNumber
);
993 for (j
= 0; j
< salvinfo
->nVolumes
; j
++) {
994 if (salvinfo
->volumeSummaryp
[j
].header
.id
!= singleVolumeNumber
) {
995 if (!salvinfo
->volumeSummaryp
[j
].deleted
) {
996 AskOnline(salvinfo
, salvinfo
->volumeSummaryp
[j
].header
.id
);
1002 Log("SALVAGING OF PARTITION %s%s COMPLETED\n",
1003 salvinfo
->fileSysPartition
->name
, (Testing
? " (READONLY mode)" : ""));
1006 fclose(inodeFile
); /* SalvageVolumeGroup was the last which needed it. */
1010 DeleteExtraVolumeHeaderFile(struct SalvInfo
*salvinfo
, struct VolumeSummary
*vsp
)
1013 char filename
[VMAXPATHLEN
];
1019 VolumeExternalName_r(vsp
->header
.id
, filename
, sizeof(filename
));
1020 sprintf(path
, "%s" OS_DIRSEP
"%s", salvinfo
->fileSysPath
, filename
);
1023 Log("The volume header file %s is not associated with any actual data (%sdeleted)\n", path
, (Testing
? "would have been " : ""));
1026 code
= VDestroyVolumeDiskHeader(salvinfo
->fileSysPartition
, vsp
->header
.id
, vsp
->header
.parent
);
1028 Log("Error %ld destroying volume disk header for volume %lu\n",
1029 afs_printable_int32_ld(code
),
1030 afs_printable_uint32_lu(vsp
->header
.id
));
1033 /* make sure we actually delete the header file; ENOENT
1034 * is fine, since VDestroyVolumeDiskHeader probably already
1036 if (unlink(path
) && errno
!= ENOENT
) {
1037 Log("Unable to unlink %s (errno = %d)\n", path
, errno
);
1039 if (salvinfo
->useFSYNC
) {
1040 AskDelete(salvinfo
, vsp
->header
.id
);
1047 CompareInodes(const void *_p1
, const void *_p2
)
1049 const struct ViceInodeInfo
*p1
= _p1
;
1050 const struct ViceInodeInfo
*p2
= _p2
;
1051 if (p1
->u
.vnode
.vnodeNumber
== INODESPECIAL
1052 || p2
->u
.vnode
.vnodeNumber
== INODESPECIAL
) {
1053 VolumeId p1rwid
, p2rwid
;
1055 (p1
->u
.vnode
.vnodeNumber
==
1056 INODESPECIAL
? p1
->u
.special
.parentId
: p1
->u
.vnode
.volumeId
);
1058 (p2
->u
.vnode
.vnodeNumber
==
1059 INODESPECIAL
? p2
->u
.special
.parentId
: p2
->u
.vnode
.volumeId
);
1060 if (p1rwid
< p2rwid
)
1062 if (p1rwid
> p2rwid
)
1064 if (p1
->u
.vnode
.vnodeNumber
== INODESPECIAL
1065 && p2
->u
.vnode
.vnodeNumber
== INODESPECIAL
) {
1066 if (p1
->u
.vnode
.volumeId
== p2
->u
.vnode
.volumeId
)
1067 return (p1
->u
.special
.type
< p2
->u
.special
.type
? -1 : 1);
1068 if (p1
->u
.vnode
.volumeId
== p1rwid
)
1070 if (p2
->u
.vnode
.volumeId
== p2rwid
)
1072 return (p1
->u
.vnode
.volumeId
< p2
->u
.vnode
.volumeId
? -1 : 1);
1074 if (p1
->u
.vnode
.vnodeNumber
!= INODESPECIAL
)
1075 return (p2
->u
.vnode
.volumeId
== p2rwid
? 1 : -1);
1076 return (p1
->u
.vnode
.volumeId
== p1rwid
? -1 : 1);
1078 if (p1
->u
.vnode
.volumeId
< p2
->u
.vnode
.volumeId
)
1080 if (p1
->u
.vnode
.volumeId
> p2
->u
.vnode
.volumeId
)
1082 if (p1
->u
.vnode
.vnodeNumber
< p2
->u
.vnode
.vnodeNumber
)
1084 if (p1
->u
.vnode
.vnodeNumber
> p2
->u
.vnode
.vnodeNumber
)
1086 /* The following tests are reversed, so that the most desirable
1087 * of several similar inodes comes first */
1088 if (p1
->u
.vnode
.vnodeUniquifier
> p2
->u
.vnode
.vnodeUniquifier
) {
1089 #ifdef AFS_3DISPARES
1090 if (p1
->u
.vnode
.vnodeUniquifier
> 3775414 /* 90% of 4.2M */ &&
1091 p2
->u
.vnode
.vnodeUniquifier
< 419490 /* 10% of 4.2M */ )
1094 #ifdef AFS_SGI_EXMAG
1095 if (p1
->u
.vnode
.vnodeUniquifier
> 15099494 /* 90% of 16M */ &&
1096 p2
->u
.vnode
.vnodeUniquifier
< 1677721 /* 10% of 16M */ )
1101 if (p1
->u
.vnode
.vnodeUniquifier
< p2
->u
.vnode
.vnodeUniquifier
) {
1102 #ifdef AFS_3DISPARES
1103 if (p2
->u
.vnode
.vnodeUniquifier
> 3775414 /* 90% of 4.2M */ &&
1104 p1
->u
.vnode
.vnodeUniquifier
< 419490 /* 10% of 4.2M */ )
1107 #ifdef AFS_SGI_EXMAG
1108 if (p2
->u
.vnode
.vnodeUniquifier
> 15099494 /* 90% of 16M */ &&
1109 p1
->u
.vnode
.vnodeUniquifier
< 1677721 /* 10% of 16M */ )
1114 if (p1
->u
.vnode
.inodeDataVersion
> p2
->u
.vnode
.inodeDataVersion
) {
1115 #ifdef AFS_3DISPARES
1116 if (p1
->u
.vnode
.inodeDataVersion
> 1887437 /* 90% of 2.1M */ &&
1117 p2
->u
.vnode
.inodeDataVersion
< 209716 /* 10% of 2.1M */ )
1120 #ifdef AFS_SGI_EXMAG
1121 if (p1
->u
.vnode
.inodeDataVersion
> 15099494 /* 90% of 16M */ &&
1122 p2
->u
.vnode
.inodeDataVersion
< 1677721 /* 10% of 16M */ )
1127 if (p1
->u
.vnode
.inodeDataVersion
< p2
->u
.vnode
.inodeDataVersion
) {
1128 #ifdef AFS_3DISPARES
1129 if (p2
->u
.vnode
.inodeDataVersion
> 1887437 /* 90% of 2.1M */ &&
1130 p1
->u
.vnode
.inodeDataVersion
< 209716 /* 10% of 2.1M */ )
1133 #ifdef AFS_SGI_EXMAG
1134 if (p2
->u
.vnode
.inodeDataVersion
> 15099494 /* 90% of 16M */ &&
1135 p1
->u
.vnode
.inodeDataVersion
< 1677721 /* 10% of 16M */ )
1144 CountVolumeInodes(struct ViceInodeInfo
*ip
, int maxInodes
,
1145 struct InodeSummary
*summary
)
1147 VolumeId volume
= ip
->u
.vnode
.volumeId
;
1148 VolumeId rwvolume
= volume
;
1153 while (maxInodes
-- && volume
== ip
->u
.vnode
.volumeId
) {
1155 if (ip
->u
.vnode
.vnodeNumber
== INODESPECIAL
) {
1157 rwvolume
= ip
->u
.special
.parentId
;
1158 /* This isn't quite right, as there could (in error) be different
1159 * parent inodes in different special vnodes */
1161 if (maxunique
< ip
->u
.vnode
.vnodeUniquifier
)
1162 maxunique
= ip
->u
.vnode
.vnodeUniquifier
;
1166 summary
->volumeId
= volume
;
1167 summary
->RWvolumeId
= rwvolume
;
1168 summary
->nInodes
= n
;
1169 summary
->nSpecialInodes
= nSpecial
;
1170 summary
->maxUniquifier
= maxunique
;
1174 OnlyOneVolume(struct ViceInodeInfo
*inodeinfo
, afs_uint32 singleVolumeNumber
, void *rock
)
1176 if (inodeinfo
->u
.vnode
.vnodeNumber
== INODESPECIAL
)
1177 return (inodeinfo
->u
.special
.parentId
== singleVolumeNumber
);
1178 return (inodeinfo
->u
.vnode
.volumeId
== singleVolumeNumber
);
1183 * Collect list of inodes in file named by path. If a truly fatal error,
1184 * unlink the file and abort. For lessor errors, return -1. The file will
1185 * be unlinked by the caller.
1188 GetInodeSummary(struct SalvInfo
*salvinfo
, FILE *inodeFile
, VolumeId singleVolumeNumber
)
1190 struct afs_stat status
;
1193 struct ViceInodeInfo
*ip
, *ip_save
;
1194 struct InodeSummary summary
;
1195 char summaryFileName
[50];
1198 char *dev
= salvinfo
->fileSysPath
;
1199 char *wpath
= salvinfo
->fileSysPath
;
1201 char *dev
= salvinfo
->fileSysDeviceName
;
1202 char *wpath
= salvinfo
->filesysfulldev
;
1204 char *part
= salvinfo
->fileSysPath
;
1210 /* This file used to come from vfsck; cobble it up ourselves now... */
1212 ListViceInodes(dev
, salvinfo
->fileSysPath
, inodeFile
,
1213 singleVolumeNumber
? OnlyOneVolume
: 0,
1214 singleVolumeNumber
, &forceSal
, forceR
, wpath
, NULL
)) < 0) {
1216 Log("*** I/O error %d when writing a tmp inode file; Not salvaged %s ***\nIncrease space on partition or use '-tmpdir'\n", errno
, dev
);
1220 Abort("Unable to get inodes for \"%s\"; not salvaged\n", dev
);
1222 if (forceSal
&& !ForceSalvage
) {
1223 Log("***Forced salvage of all volumes on this partition***\n");
1226 fseek(inodeFile
, 0L, SEEK_SET
);
1227 salvinfo
->inodeFd
= fileno(inodeFile
);
1228 if (salvinfo
->inodeFd
== -1 || afs_fstat(salvinfo
->inodeFd
, &status
) == -1) {
1229 Abort("No inode description file for \"%s\"; not salvaged\n", dev
);
1231 tdir
= (tmpdir
? tmpdir
: part
);
1233 (void)_putenv("TMP="); /* If "TMP" is set, then that overrides tdir. */
1234 (void)strcpy(summaryFileName
, _tempnam(tdir
, "salvage.temp."));
1236 (void)afs_snprintf(summaryFileName
, sizeof summaryFileName
,
1237 "%s" OS_DIRSEP
"salvage.temp.%d", tdir
, getpid());
1239 summaryFile
= afs_fopen(summaryFileName
, "a+");
1240 if (summaryFile
== NULL
) {
1241 Abort("Unable to create inode summary file\n");
1245 /* Using nt_unlink here since we're really using the delete on close
1246 * semantics of unlink. In most places in the salvager, we really do
1247 * mean to unlink the file at that point. Those places have been
1248 * modified to actually do that so that the NT crt can be used there.
1250 * jaltman - As commented elsewhere, this cannot work because fopen()
1251 * does not open files with DELETE and FILE_SHARE_DELETE.
1253 code
= nt_unlink(summaryFileName
);
1255 code
= unlink(summaryFileName
);
1258 Log("Error %d when trying to unlink %s\n", errno
, summaryFileName
);
1261 if (!canfork
|| debug
|| Fork() == 0) {
1263 unsigned long st_size
=(unsigned long) status
.st_size
;
1264 nInodes
= st_size
/ sizeof(struct ViceInodeInfo
);
1266 fclose(summaryFile
);
1267 if (!singleVolumeNumber
) /* Remove the FORCESALVAGE file */
1268 RemoveTheForce(salvinfo
->fileSysPath
);
1270 struct VolumeSummary
*vsp
;
1274 GetVolumeSummary(salvinfo
, singleVolumeNumber
);
1276 for (i
= 0, vsp
= salvinfo
->volumeSummaryp
; i
< salvinfo
->nVolumes
; i
++) {
1278 if (vsp
->header
.id
== singleVolumeNumber
) {
1281 DeleteExtraVolumeHeaderFile(salvinfo
, vsp
);
1287 MaybeAskOnline(salvinfo
, singleVolumeNumber
);
1289 /* make sure we get rid of stray .vol headers, even if
1290 * they're not in our volume summary (might happen if
1291 * e.g. something else created them and they're not in the
1292 * fileserver VGC) */
1293 VDestroyVolumeDiskHeader(salvinfo
->fileSysPartition
,
1294 singleVolumeNumber
, 0 /*parent*/);
1295 AskDelete(salvinfo
, singleVolumeNumber
);
1299 Log("%s vice inodes on %s; not salvaged\n",
1300 singleVolumeNumber
? "No applicable" : "No", dev
);
1305 ip
= (struct ViceInodeInfo
*)malloc(nInodes
*sizeof(struct ViceInodeInfo
));
1307 fclose(summaryFile
);
1309 ("Unable to allocate enough space to read inode table; %s not salvaged\n",
1312 if (read(salvinfo
->inodeFd
, ip
, st_size
) != st_size
) {
1313 fclose(summaryFile
);
1314 Abort("Unable to read inode table; %s not salvaged\n", dev
);
1316 qsort(ip
, nInodes
, sizeof(struct ViceInodeInfo
), CompareInodes
);
1317 if (afs_lseek(salvinfo
->inodeFd
, 0, SEEK_SET
) == -1
1318 || write(salvinfo
->inodeFd
, ip
, st_size
) != st_size
) {
1319 fclose(summaryFile
);
1320 Abort("Unable to rewrite inode table; %s not salvaged\n", dev
);
1325 CountVolumeInodes(ip
, nInodes
, &summary
);
1326 if (fwrite(&summary
, sizeof(summary
), 1, summaryFile
) != 1) {
1327 Log("Difficulty writing summary file (errno = %d); %s not salvaged\n", errno
, dev
);
1328 fclose(summaryFile
);
1332 summary
.index
+= (summary
.nInodes
);
1333 nInodes
-= summary
.nInodes
;
1334 ip
+= summary
.nInodes
;
1337 ip
= ip_save
= NULL
;
1338 /* Following fflush is not fclose, because if it was debug mode would not work */
1339 if (fflush(summaryFile
) == EOF
|| fsync(fileno(summaryFile
)) == -1) {
1340 Log("Unable to write summary file (errno = %d); %s not salvaged\n", errno
, dev
);
1341 fclose(summaryFile
);
1345 if (canfork
&& !debug
) {
1350 if (Wait("Inode summary") == -1) {
1351 fclose(summaryFile
);
1352 Exit(1); /* salvage of this partition aborted */
1355 osi_Assert(afs_fstat(fileno(summaryFile
), &status
) != -1);
1356 if (status
.st_size
!= 0) {
1358 unsigned long st_status
=(unsigned long)status
.st_size
;
1359 salvinfo
->inodeSummary
= (struct InodeSummary
*)malloc(st_status
);
1360 osi_Assert(salvinfo
->inodeSummary
!= NULL
);
1361 /* For GNU we need to do lseek to get the file pointer moved. */
1362 osi_Assert(afs_lseek(fileno(summaryFile
), 0, SEEK_SET
) == 0);
1363 ret
= read(fileno(summaryFile
), salvinfo
->inodeSummary
, st_status
);
1364 osi_Assert(ret
== st_status
);
1366 salvinfo
->nVolumesInInodeFile
=(unsigned long)(status
.st_size
) / sizeof(struct InodeSummary
);
1367 for (i
= 0; i
< salvinfo
->nVolumesInInodeFile
; i
++) {
1368 salvinfo
->inodeSummary
[i
].volSummary
= NULL
;
1370 Log("%d nVolumesInInodeFile %lu \n",salvinfo
->nVolumesInInodeFile
,(unsigned long)(status
.st_size
));
1371 fclose(summaryFile
);
1374 if (retcode
&& singleVolumeNumber
&& !deleted
) {
1375 AskError(salvinfo
, singleVolumeNumber
);
1381 /* Comparison routine for volume sort.
1382 This is setup so that a read-write volume comes immediately before
1383 any read-only clones of that volume */
1385 CompareVolumes(const void *_p1
, const void *_p2
)
1387 const struct VolumeSummary
*p1
= _p1
;
1388 const struct VolumeSummary
*p2
= _p2
;
1389 if (p1
->header
.parent
!= p2
->header
.parent
)
1390 return p1
->header
.parent
< p2
->header
.parent
? -1 : 1;
1391 if (p1
->header
.id
== p1
->header
.parent
) /* p1 is rw volume */
1393 if (p2
->header
.id
== p2
->header
.parent
) /* p2 is rw volume */
1395 return p1
->header
.id
< p2
->header
.id
? -1 : 1; /* Both read-only */
1399 * Gleans volumeSummary information by asking the fileserver
1401 * @param[in] singleVolumeNumber the volume we're salvaging. 0 if we're
1402 * salvaging a whole partition
1404 * @return whether we obtained the volume summary information or not
1405 * @retval 0 success; we obtained the volume summary information
1406 * @retval -1 we raced with a fileserver restart; volume locks and checkout
1408 * @retval 1 we did not get the volume summary information; either the
1409 * fileserver responded with an error, or we are not supposed to
1410 * ask the fileserver for the information (e.g. we are salvaging
1411 * the entire partition or we are not the salvageserver)
1413 * @note for non-DAFS, always returns 1
1416 AskVolumeSummary(struct SalvInfo
*salvinfo
, VolumeId singleVolumeNumber
)
1419 #if defined(FSSYNC_BUILD_CLIENT) && defined(AFS_DEMAND_ATTACH_FS)
1420 if (programType
== salvageServer
) {
1421 if (singleVolumeNumber
) {
1422 FSSYNC_VGQry_response_t q_res
;
1424 struct VolumeSummary
*vsp
;
1426 struct VolumeDiskHeader diskHdr
;
1428 memset(&res
, 0, sizeof(res
));
1430 code
= FSYNC_VGCQuery(salvinfo
->fileSysPartition
->name
, singleVolumeNumber
, &q_res
, &res
);
1433 * We must wait for the partition to finish scanning before
1434 * can continue, since we will not know if we got the entire
1435 * VG membership unless the partition is fully scanned.
1436 * We could, in theory, just scan the partition ourselves if
1437 * the VG cache is not ready, but we would be doing the exact
1438 * same scan the fileserver is doing; it will almost always
1439 * be faster to wait for the fileserver. The only exceptions
1440 * are if the partition does not take very long to scan, and
1441 * in that case it's fast either way, so who cares?
1443 if (code
== SYNC_FAILED
&& res
.hdr
.reason
== FSYNC_PART_SCANNING
) {
1444 Log("waiting for fileserver to finish scanning partition %s...\n",
1445 salvinfo
->fileSysPartition
->name
);
1447 for (i
= 1; code
== SYNC_FAILED
&& res
.hdr
.reason
== FSYNC_PART_SCANNING
; i
++) {
1448 /* linearly ramp up from 1 to 10 seconds; nothing fancy,
1449 * just so small partitions don't need to wait over 10
1450 * seconds every time, and large partitions are generally
1451 * polled only once every ten seconds. */
1452 sleep((i
> 10) ? (i
= 10) : i
);
1454 code
= FSYNC_VGCQuery(salvinfo
->fileSysPartition
->name
, singleVolumeNumber
, &q_res
, &res
);
1458 if (code
== SYNC_FAILED
&& res
.hdr
.reason
== FSYNC_UNKNOWN_VOLID
) {
1459 /* This can happen if there's no header for the volume
1460 * we're salvaging, or no headers exist for the VG (if
1461 * we're salvaging an RW). Act as if we got a response
1462 * with no VG members. The headers may be created during
1463 * salvaging, if there are inodes in this VG. */
1465 memset(&q_res
, 0, sizeof(q_res
));
1466 q_res
.rw
= singleVolumeNumber
;
1470 Log("fileserver refused VGCQuery request for volume %lu on "
1471 "partition %s, code %ld reason %ld\n",
1472 afs_printable_uint32_lu(singleVolumeNumber
),
1473 salvinfo
->fileSysPartition
->name
,
1474 afs_printable_int32_ld(code
),
1475 afs_printable_int32_ld(res
.hdr
.reason
));
1479 if (q_res
.rw
!= singleVolumeNumber
) {
1480 Log("fileserver requested salvage of clone %lu; scheduling salvage of volume group %lu...\n",
1481 afs_printable_uint32_lu(singleVolumeNumber
),
1482 afs_printable_uint32_lu(q_res
.rw
));
1483 #ifdef SALVSYNC_BUILD_CLIENT
1484 if (SALVSYNC_LinkVolume(q_res
.rw
,
1486 salvinfo
->fileSysPartition
->name
,
1488 Log("schedule request failed\n");
1490 #endif /* SALVSYNC_BUILD_CLIENT */
1491 Exit(SALSRV_EXIT_VOLGROUP_LINK
);
1494 salvinfo
->volumeSummaryp
= calloc(VOL_VG_MAX_VOLS
, sizeof(struct VolumeSummary
));
1495 osi_Assert(salvinfo
->volumeSummaryp
!= NULL
);
1497 salvinfo
->nVolumes
= 0;
1498 vsp
= salvinfo
->volumeSummaryp
;
1500 for (i
= 0; i
< VOL_VG_MAX_VOLS
; i
++) {
1501 char name
[VMAXPATHLEN
];
1503 if (!q_res
.children
[i
]) {
1507 /* AskOffline for singleVolumeNumber was called much earlier */
1508 if (q_res
.children
[i
] != singleVolumeNumber
) {
1509 AskOffline(salvinfo
, q_res
.children
[i
]);
1510 if (LockVolume(salvinfo
, q_res
.children
[i
])) {
1516 code
= VReadVolumeDiskHeader(q_res
.children
[i
], salvinfo
->fileSysPartition
, &diskHdr
);
1518 Log("Cannot read header for %lu; trying to salvage group anyway\n",
1519 afs_printable_uint32_lu(q_res
.children
[i
]));
1524 DiskToVolumeHeader(&vsp
->header
, &diskHdr
);
1525 VolumeExternalName_r(q_res
.children
[i
], name
, sizeof(name
));
1527 salvinfo
->nVolumes
++;
1531 qsort(salvinfo
->volumeSummaryp
, salvinfo
->nVolumes
, sizeof(struct VolumeSummary
),
1536 Log("Cannot get volume summary from fileserver; falling back to scanning "
1537 "entire partition\n");
1540 #endif /* FSSYNC_BUILD_CLIENT && AFS_DEMAND_ATTACH_FS */
1545 * count how many volume headers are found by VWalkVolumeHeaders.
1547 * @param[in] dp the disk partition (unused)
1548 * @param[in] name full path to the .vol header (unused)
1549 * @param[in] hdr the header data (unused)
1550 * @param[in] last whether this is the last try or not (unused)
1551 * @param[in] rock actually an afs_int32*; the running count of how many
1552 * volumes we have found
1557 CountHeader(struct DiskPartition64
*dp
, const char *name
,
1558 struct VolumeDiskHeader
*hdr
, int last
, void *rock
)
1560 afs_int32
*nvols
= (afs_int32
*)rock
;
1566 * parameters to pass to the VWalkVolumeHeaders callbacks when recording volume
1569 struct SalvageScanParams
{
1570 VolumeId singleVolumeNumber
; /**< 0 for a partition-salvage, otherwise the
1571 * vol id of the VG we're salvaging */
1572 struct VolumeSummary
*vsp
; /**< ptr to the current volume summary object
1573 * we're filling in */
1574 afs_int32 nVolumes
; /**< # of vols we've encountered */
1575 afs_int32 totalVolumes
; /**< max # of vols we should encounter (the
1576 * # of vols we've alloc'd memory for) */
1577 int retry
; /**< do we need to retry vol lock/checkout? */
1578 struct SalvInfo
*salvinfo
; /**< salvage job info */
1582 * records volume summary info found from VWalkVolumeHeaders.
1584 * Found volumes are also taken offline if they are in the specific volume
1585 * group we are looking for.
1587 * @param[in] dp the disk partition
1588 * @param[in] name full path to the .vol header
1589 * @param[in] hdr the header data
1590 * @param[in] last 1 if this is the last try to read the header, 0 otherwise
1591 * @param[in] rock actually a struct SalvageScanParams*, containing the
1592 * information needed to record the volume summary data
1594 * @return operation status
1596 * @retval -1 volume locking raced with fileserver restart; checking out
1597 * and locking volumes needs to be retried
1598 * @retval 1 volume header is mis-named and should be deleted
1601 RecordHeader(struct DiskPartition64
*dp
, const char *name
,
1602 struct VolumeDiskHeader
*hdr
, int last
, void *rock
)
1604 char nameShouldBe
[64];
1605 struct SalvageScanParams
*params
;
1606 struct VolumeSummary summary
;
1607 VolumeId singleVolumeNumber
;
1608 struct SalvInfo
*salvinfo
;
1610 params
= (struct SalvageScanParams
*)rock
;
1612 memset(&summary
, 0, sizeof(summary
));
1614 singleVolumeNumber
= params
->singleVolumeNumber
;
1615 salvinfo
= params
->salvinfo
;
1617 DiskToVolumeHeader(&summary
.header
, hdr
);
1619 if (singleVolumeNumber
&& summary
.header
.id
== singleVolumeNumber
1620 && summary
.header
.parent
!= singleVolumeNumber
) {
1622 if (programType
== salvageServer
) {
1623 #ifdef SALVSYNC_BUILD_CLIENT
1624 Log("fileserver requested salvage of clone %u; scheduling salvage of volume group %u...\n",
1625 summary
.header
.id
, summary
.header
.parent
);
1626 if (SALVSYNC_LinkVolume(summary
.header
.parent
,
1630 Log("schedule request failed\n");
1633 Exit(SALSRV_EXIT_VOLGROUP_LINK
);
1636 Log("%u is a read-only volume; not salvaged\n",
1637 singleVolumeNumber
);
1642 if (!singleVolumeNumber
|| summary
.header
.id
== singleVolumeNumber
1643 || summary
.header
.parent
== singleVolumeNumber
) {
1645 /* check if the header file is incorrectly named */
1647 const char *base
= strrchr(name
, OS_DIRSEPC
);
1654 (void)afs_snprintf(nameShouldBe
, sizeof nameShouldBe
,
1655 VFORMAT
, afs_printable_uint32_lu(summary
.header
.id
));
1658 if (strcmp(nameShouldBe
, base
)) {
1659 /* .vol file has wrong name; retry/delete */
1663 if (!badname
|| last
) {
1664 /* only offline the volume if the header is good, or if this is
1665 * the last try looking at it; avoid AskOffline'ing the same vol
1668 if (singleVolumeNumber
1669 && summary
.header
.id
!= singleVolumeNumber
) {
1670 /* don't offline singleVolumeNumber; we already did that
1673 AskOffline(salvinfo
, summary
.header
.id
);
1675 #ifdef AFS_DEMAND_ATTACH_FS
1677 /* don't lock the volume if the header is bad, since we're
1678 * about to delete it anyway. */
1679 if (LockVolume(salvinfo
, summary
.header
.id
)) {
1684 #endif /* AFS_DEMAND_ATTACH_FS */
1688 if (last
&& !Showmode
) {
1689 Log("Volume header file %s is incorrectly named (should be %s "
1690 "not %s); %sdeleted (it will be recreated later, if "
1691 "necessary)\n", name
, nameShouldBe
, base
,
1692 (Testing
? "it would have been " : ""));
1700 if (params
->nVolumes
> params
->totalVolumes
) {
1701 /* We found more volumes than we found on the first partition walk;
1702 * apparently something created a volume while we were
1703 * partition-salvaging, or we found more than 20 vols when salvaging a
1704 * particular volume. Abort if we detect this, since other programs
1705 * supposed to not touch the partition while it is partition-salvaging,
1706 * and we shouldn't find more than 20 vols in a VG.
1708 Abort("Found %ld vol headers, but should have found at most %ld! "
1709 "Make sure the volserver/fileserver are not running at the "
1710 "same time as a partition salvage\n",
1711 afs_printable_int32_ld(params
->nVolumes
),
1712 afs_printable_int32_ld(params
->totalVolumes
));
1715 memcpy(params
->vsp
, &summary
, sizeof(summary
));
1723 * possibly unlinks bad volume headers found from VWalkVolumeHeaders.
1725 * If the header could not be read in at all, the header is always unlinked.
1726 * If instead RecordHeader said the header was bad (that is, the header file
1727 * is mis-named), we only unlink if we are doing a partition salvage, as
1728 * opposed to salvaging a specific volume group.
1730 * @param[in] dp the disk partition
1731 * @param[in] name full path to the .vol header
1732 * @param[in] hdr header data, or NULL if the header could not be read
1733 * @param[in] rock actually a struct SalvageScanParams*, with some information
1737 UnlinkHeader(struct DiskPartition64
*dp
, const char *name
,
1738 struct VolumeDiskHeader
*hdr
, void *rock
)
1740 struct SalvageScanParams
*params
;
1743 params
= (struct SalvageScanParams
*)rock
;
1746 /* no header; header is too bogus to read in at all */
1748 Log("%s is not a legitimate volume header file; %sdeleted\n", name
, (Testing
? "it would have been " : ""));
1754 } else if (!params
->singleVolumeNumber
) {
1755 /* We were able to read in a header, but RecordHeader said something
1756 * was wrong with it. We only unlink those if we are doing a partition
1763 if (dounlink
&& unlink(name
)) {
1764 Log("Error %d while trying to unlink %s\n", errno
, name
);
1769 * Populates salvinfo->volumeSummaryp with volume summary information, either by asking
1770 * the fileserver for VG information, or by scanning the /vicepX partition.
1772 * @param[in] singleVolumeNumber the volume ID of the single volume group we
1773 * are salvaging, or 0 if this is a partition
1776 * @return operation status
1778 * @retval -1 we raced with a fileserver restart; checking out and locking
1779 * volumes must be retried
1782 GetVolumeSummary(struct SalvInfo
*salvinfo
, VolumeId singleVolumeNumber
)
1784 afs_int32 nvols
= 0;
1785 struct SalvageScanParams params
;
1788 code
= AskVolumeSummary(salvinfo
, singleVolumeNumber
);
1790 /* we successfully got the vol information from the fileserver; no
1791 * need to scan the partition */
1795 /* we need to retry volume checkout */
1799 if (!singleVolumeNumber
) {
1800 /* Count how many volumes we have in /vicepX */
1801 code
= VWalkVolumeHeaders(salvinfo
->fileSysPartition
, salvinfo
->fileSysPath
, CountHeader
,
1804 Abort("Can't read directory %s; not salvaged\n", salvinfo
->fileSysPath
);
1809 nvols
= VOL_VG_MAX_VOLS
;
1812 salvinfo
->volumeSummaryp
= calloc(nvols
, sizeof(struct VolumeSummary
));
1813 osi_Assert(salvinfo
->volumeSummaryp
!= NULL
);
1815 params
.singleVolumeNumber
= singleVolumeNumber
;
1816 params
.vsp
= salvinfo
->volumeSummaryp
;
1817 params
.nVolumes
= 0;
1818 params
.totalVolumes
= nvols
;
1820 params
.salvinfo
= salvinfo
;
1822 /* walk the partition directory of volume headers and record the info
1823 * about them; unlinking invalid headers */
1824 code
= VWalkVolumeHeaders(salvinfo
->fileSysPartition
, salvinfo
->fileSysPath
, RecordHeader
,
1825 UnlinkHeader
, ¶ms
);
1827 /* we apparently need to retry checking-out/locking volumes */
1831 Abort("Failed to get volume header summary\n");
1833 salvinfo
->nVolumes
= params
.nVolumes
;
1835 qsort(salvinfo
->volumeSummaryp
, salvinfo
->nVolumes
, sizeof(struct VolumeSummary
),
1841 /* Find the link table. This should be associated with the RW volume, even
1842 * if there is only an RO volume at this site.
1845 FindLinkHandle(struct InodeSummary
*isp
, int nVols
,
1846 struct ViceInodeInfo
*allInodes
)
1849 struct ViceInodeInfo
*ip
;
1851 for (i
= 0; i
< nVols
; i
++) {
1852 ip
= allInodes
+ isp
[i
].index
;
1853 for (j
= 0; j
< isp
[i
].nSpecialInodes
; j
++) {
1854 if (ip
[j
].u
.special
.volumeId
== isp
->RWvolumeId
&&
1855 ip
[j
].u
.special
.parentId
== isp
->RWvolumeId
&&
1856 ip
[j
].u
.special
.type
== VI_LINKTABLE
) {
1857 return ip
[j
].inodeNumber
;
1864 #ifdef AFS_NAMEI_ENV
1866 CheckDupLinktable(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
, struct ViceInodeInfo
*ip
)
1869 if (ip
->u
.vnode
.vnodeNumber
!= INODESPECIAL
) {
1870 /* not a linktable; process as a normal file */
1873 if (ip
->u
.special
.type
!= VI_LINKTABLE
) {
1874 /* not a linktable; process as a normal file */
1878 /* make sure nothing inc/decs it */
1881 if (ip
->u
.special
.volumeId
== ip
->u
.special
.parentId
) {
1882 /* This is a little weird, but shouldn't break anything, and there is
1883 * no known way that this can happen; just do nothing, in case deleting
1884 * it would screw something up. */
1885 Log("Inode %s appears to be a valid linktable for id (%u), but it's not\n",
1886 PrintInode(stmp
, ip
->inodeNumber
), ip
->u
.special
.parentId
);
1887 Log("the linktable for our volume group (%u). This is unusual, since\n",
1889 Log("there should only be one linktable per volume group. I'm leaving\n");
1890 Log("it alone, just to be safe.\n");
1894 Log("Linktable %s appears to be invalid (parentid/volumeid mismatch: %u != %u)\n",
1895 PrintInode(stmp
, ip
->inodeNumber
), ip
->u
.special
.parentId
, ip
->u
.special
.volumeId
);
1897 Log("Would have deleted linktable inode %s\n", PrintInode(stmp
, ip
->inodeNumber
));
1902 Log("Deleting linktable inode %s\n", PrintInode(stmp
, ip
->inodeNumber
));
1903 IH_INIT(tmpH
, salvinfo
->fileSysDevice
, isp
->RWvolumeId
, ip
->inodeNumber
);
1904 namei_HandleToName(&ufs_name
, tmpH
);
1905 if (unlink(ufs_name
.n_path
) < 0) {
1906 Log("Error %d unlinking path %s\n", errno
, ufs_name
.n_path
);
1915 CreateLinkTable(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
, Inode ino
)
1917 struct versionStamp version
;
1920 if (!VALID_INO(ino
))
1922 IH_CREATE(NULL
, salvinfo
->fileSysDevice
, salvinfo
->fileSysPath
, 0, isp
->RWvolumeId
,
1923 INODESPECIAL
, VI_LINKTABLE
, isp
->RWvolumeId
);
1924 if (!VALID_INO(ino
))
1926 ("Unable to allocate link table inode for volume %u (error = %d)\n",
1927 isp
->RWvolumeId
, errno
);
1928 IH_INIT(salvinfo
->VGLinkH
, salvinfo
->fileSysDevice
, isp
->RWvolumeId
, ino
);
1929 fdP
= IH_OPEN(salvinfo
->VGLinkH
);
1931 Abort("Can't open link table for volume %u (error = %d)\n",
1932 isp
->RWvolumeId
, errno
);
1934 if (FDH_TRUNC(fdP
, sizeof(version
) + sizeof(short)) < 0)
1935 Abort("Can't truncate link table for volume %u (error = %d)\n",
1936 isp
->RWvolumeId
, errno
);
1938 version
.magic
= LINKTABLEMAGIC
;
1939 version
.version
= LINKTABLEVERSION
;
1941 if (FDH_PWRITE(fdP
, (char *)&version
, sizeof(version
), 0)
1943 Abort("Can't truncate link table for volume %u (error = %d)\n",
1944 isp
->RWvolumeId
, errno
);
1946 FDH_REALLYCLOSE(fdP
);
1948 /* If the volume summary exits (i.e., the V*.vol header file exists),
1949 * then set this inode there as well.
1951 if (isp
->volSummary
)
1952 isp
->volSummary
->header
.linkTable
= ino
;
1961 SVGParms_t
*parms
= (SVGParms_t
*) arg
;
1962 DoSalvageVolumeGroup(parms
->svgp_salvinfo
, parms
->svgp_inodeSummaryp
, parms
->svgp_count
);
1967 SalvageVolumeGroup(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
, int nVols
)
1970 pthread_attr_t tattr
;
1974 /* Initialize per volume global variables, even if later code does so */
1975 salvinfo
->VolumeChanged
= 0;
1976 salvinfo
->VGLinkH
= NULL
;
1977 salvinfo
->VGLinkH_cnt
= 0;
1978 memset(&salvinfo
->VolInfo
, 0, sizeof(salvinfo
->VolInfo
));
1980 parms
.svgp_inodeSummaryp
= isp
;
1981 parms
.svgp_count
= nVols
;
1982 parms
.svgp_salvinfo
= salvinfo
;
1983 code
= pthread_attr_init(&tattr
);
1985 Log("Failed to salvage volume group %u: pthread_attr_init()\n",
1989 code
= pthread_attr_setdetachstate(&tattr
, PTHREAD_CREATE_JOINABLE
);
1991 Log("Failed to salvage volume group %u: pthread_attr_setdetachstate()\n", isp
->RWvolumeId
);
1994 code
= pthread_create(&tid
, &tattr
, nt_SVG
, &parms
);
1996 Log("Failed to create thread to salvage volume group %u\n",
2000 (void)pthread_join(tid
, NULL
);
2002 #endif /* AFS_NT40_ENV */
2005 DoSalvageVolumeGroup(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
, int nVols
)
2007 struct ViceInodeInfo
*inodes
, *allInodes
, *ip
;
2008 int i
, totalInodes
, size
, salvageTo
;
2012 int dec_VGLinkH
= 0;
2014 FdHandle_t
*fdP
= NULL
;
2016 salvinfo
->VGLinkH_cnt
= 0;
2017 haveRWvolume
= (isp
->volumeId
== isp
->RWvolumeId
2018 && isp
->nSpecialInodes
> 0);
2019 if ((!ShowMounts
) || (ShowMounts
&& !haveRWvolume
)) {
2020 if (!ForceSalvage
&& QuickCheck(salvinfo
, isp
, nVols
))
2023 if (ShowMounts
&& !haveRWvolume
)
2025 if (canfork
&& !debug
&& Fork() != 0) {
2026 (void)Wait("Salvage volume group");
2029 for (i
= 0, totalInodes
= 0; i
< nVols
; i
++)
2030 totalInodes
+= isp
[i
].nInodes
;
2031 size
= totalInodes
* sizeof(struct ViceInodeInfo
);
2032 inodes
= (struct ViceInodeInfo
*)malloc(size
);
2033 allInodes
= inodes
- isp
->index
; /* this would the base of all the inodes
2034 * for the partition, if all the inodes
2035 * had been read into memory */
2036 osi_Assert(afs_lseek
2037 (salvinfo
->inodeFd
, isp
->index
* sizeof(struct ViceInodeInfo
),
2039 osi_Assert(read(salvinfo
->inodeFd
, inodes
, size
) == size
);
2041 /* Don't try to salvage a read write volume if there isn't one on this
2043 salvageTo
= haveRWvolume
? 0 : 1;
2045 #ifdef AFS_NAMEI_ENV
2046 ino
= FindLinkHandle(isp
, nVols
, allInodes
);
2047 if (VALID_INO(ino
)) {
2048 IH_INIT(salvinfo
->VGLinkH
, salvinfo
->fileSysDevice
, isp
->RWvolumeId
, ino
);
2049 fdP
= IH_OPEN(salvinfo
->VGLinkH
);
2051 if (VALID_INO(ino
) && fdP
!= NULL
) {
2052 struct versionStamp header
;
2053 afs_sfsize_t nBytes
;
2055 nBytes
= FDH_PREAD(fdP
, (char *)&header
, sizeof(struct versionStamp
), 0);
2056 if (nBytes
!= sizeof(struct versionStamp
)
2057 || header
.magic
!= LINKTABLEMAGIC
) {
2058 Log("Bad linktable header for volume %u.\n", isp
->RWvolumeId
);
2059 FDH_REALLYCLOSE(fdP
);
2063 if (!VALID_INO(ino
) || fdP
== NULL
) {
2064 Log("%s link table for volume %u.\n",
2065 Testing
? "Would have recreated" : "Recreating", isp
->RWvolumeId
);
2067 IH_INIT(salvinfo
->VGLinkH
, salvinfo
->fileSysDevice
, -1, -1);
2070 struct ViceInodeInfo
*ip
;
2071 CreateLinkTable(salvinfo
, isp
, ino
);
2072 fdP
= IH_OPEN(salvinfo
->VGLinkH
);
2073 /* Sync fake 1 link counts to the link table, now that it exists */
2075 for (i
= 0; i
< nVols
; i
++) {
2076 ip
= allInodes
+ isp
[i
].index
;
2077 for (j
= isp
[i
].nSpecialInodes
; j
< isp
[i
].nInodes
; j
++) {
2078 namei_SetLinkCount(fdP
, ip
[j
].inodeNumber
, 1, 1);
2079 ip
[j
].linkCount
= 1;
2086 FDH_REALLYCLOSE(fdP
);
2088 IH_INIT(salvinfo
->VGLinkH
, salvinfo
->fileSysDevice
, -1, -1);
2091 /* Salvage in reverse order--read/write volume last; this way any
2092 * Inodes not referenced by the time we salvage the read/write volume
2093 * can be picked up by the read/write volume */
2094 /* ACTUALLY, that's not done right now--the inodes just vanish */
2095 for (i
= nVols
- 1; i
>= salvageTo
; i
--) {
2097 struct InodeSummary
*lisp
= &isp
[i
];
2098 #ifdef AFS_NAMEI_ENV
2099 if (rw
&& (nVols
> 1 || isp
[i
].nSpecialInodes
== isp
[i
].nInodes
)) {
2100 /* If nVols > 1, we have more than one vol in this volgroup, so
2101 * the RW inodes we detected may just be for the linktable, and
2102 * there is no actual RW volume.
2104 * Additionally, if we only have linktable inodes (no other
2105 * special inodes, no data inodes), there is also no actual RW
2106 * volume to salvage; this is just cruft left behind by something
2107 * else. In that case nVols will only be 1, though, so also
2108 * perform this linktables-only check if we don't have any
2109 * non-special inodes. */
2111 int all_linktables
= 1;
2112 for (inode_i
= 0; inode_i
< isp
[i
].nSpecialInodes
; inode_i
++) {
2113 if (inodes
[inode_i
].u
.special
.type
!= VI_LINKTABLE
) {
2118 if (all_linktables
) {
2119 /* All we have are linktable special inodes, so skip salvaging
2120 * the RW; there was never an RW volume here. If we don't do
2121 * this, we risk creating a new "phantom" RW that the VLDB
2122 * doesn't know about, which is confusing and can cause
2130 Log("%s VOLUME %u%s.\n", rw
? "SALVAGING" : "CHECKING CLONED",
2131 lisp
->volumeId
, (Testing
? "(READONLY mode)" : ""));
2132 /* Check inodes twice. The second time do things seriously. This
2133 * way the whole RO volume can be deleted, below, if anything goes wrong */
2134 for (check
= 1; check
>= 0; check
--) {
2136 if (SalvageVolumeHeaderFile(salvinfo
, lisp
, allInodes
, rw
, check
, &deleteMe
)
2138 MaybeZapVolume(salvinfo
, lisp
, "Volume header", deleteMe
, check
);
2139 if (rw
&& deleteMe
) {
2140 haveRWvolume
= 0; /* This will cause its inodes to be deleted--since salvage
2141 * volume won't be called */
2147 if (rw
&& check
== 1)
2149 if (SalvageVnodes(salvinfo
, isp
, lisp
, allInodes
, check
) == -1) {
2150 MaybeZapVolume(salvinfo
, lisp
, "Vnode index", 0, check
);
2156 /* Fix actual inode counts */
2159 Log("totalInodes %d\n",totalInodes
);
2160 for (ip
= inodes
; totalInodes
; ip
++, totalInodes
--) {
2161 static int TraceBadLinkCounts
= 0;
2162 #ifdef AFS_NAMEI_ENV
2163 if (salvinfo
->VGLinkH
->ih_ino
== ip
->inodeNumber
) {
2164 dec_VGLinkH
= ip
->linkCount
- salvinfo
->VGLinkH_cnt
;
2165 VGLinkH_p1
= ip
->u
.param
[0];
2166 continue; /* Deal with this last. */
2167 } else if (CheckDupLinktable(salvinfo
, isp
, ip
)) {
2168 /* Don't touch this inode; CheckDupLinktable has handled it */
2172 if (ip
->linkCount
!= 0 && TraceBadLinkCounts
) {
2173 TraceBadLinkCounts
--; /* Limit reports, per volume */
2174 Log("#### DEBUG #### Link count incorrect by %d; inode %s, size %llu, p=(%u,%u,%u,%u)\n", ip
->linkCount
, PrintInode(stmp
, ip
->inodeNumber
), (afs_uintmax_t
) ip
->byteCount
, ip
->u
.param
[0], ip
->u
.param
[1], ip
->u
.param
[2], ip
->u
.param
[3]);
2176 while (ip
->linkCount
> 0) {
2177 /* below used to assert, not break */
2179 if (IH_DEC(salvinfo
->VGLinkH
, ip
->inodeNumber
, ip
->u
.param
[0])) {
2180 Log("idec failed. inode %s errno %d\n",
2181 PrintInode(stmp
, ip
->inodeNumber
), errno
);
2187 while (ip
->linkCount
< 0) {
2188 /* these used to be asserts */
2190 if (IH_INC(salvinfo
->VGLinkH
, ip
->inodeNumber
, ip
->u
.param
[0])) {
2191 Log("iinc failed. inode %s errno %d\n",
2192 PrintInode(stmp
, ip
->inodeNumber
), errno
);
2199 #ifdef AFS_NAMEI_ENV
2200 while (dec_VGLinkH
> 0) {
2201 if (IH_DEC(salvinfo
->VGLinkH
, salvinfo
->VGLinkH
->ih_ino
, VGLinkH_p1
) < 0) {
2202 Log("idec failed on link table, errno = %d\n", errno
);
2206 while (dec_VGLinkH
< 0) {
2207 if (IH_INC(salvinfo
->VGLinkH
, salvinfo
->VGLinkH
->ih_ino
, VGLinkH_p1
) < 0) {
2208 Log("iinc failed on link table, errno = %d\n", errno
);
2215 /* Directory consistency checks on the rw volume */
2217 SalvageVolume(salvinfo
, isp
, salvinfo
->VGLinkH
);
2218 IH_RELEASE(salvinfo
->VGLinkH
);
2220 if (canfork
&& !debug
) {
2227 QuickCheck(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
, int nVols
)
2229 /* Check headers BEFORE forking */
2233 for (i
= 0; i
< nVols
; i
++) {
2234 struct VolumeSummary
*vs
= isp
[i
].volSummary
;
2235 VolumeDiskData volHeader
;
2237 /* Don't salvage just because phantom rw volume is there... */
2238 /* (If a read-only volume exists, read/write inodes must also exist) */
2239 if (i
== 0 && isp
->nSpecialInodes
== 0 && nVols
> 1)
2243 IH_INIT(h
, salvinfo
->fileSysDevice
, vs
->header
.parent
, vs
->header
.volumeInfo
);
2244 if (IH_IREAD(h
, 0, (char *)&volHeader
, sizeof(volHeader
))
2245 == sizeof(volHeader
)
2246 && volHeader
.stamp
.magic
== VOLUMEINFOMAGIC
2247 && volHeader
.dontSalvage
== DONT_SALVAGE
2248 && volHeader
.needsSalvaged
== 0 && volHeader
.destroyMe
== 0) {
2249 if (volHeader
.inUse
!= 0) {
2250 volHeader
.inUse
= 0;
2251 volHeader
.inService
= 1;
2253 if (IH_IWRITE(h
, 0, (char *)&volHeader
, sizeof(volHeader
))
2254 != sizeof(volHeader
)) {
2270 /* SalvageVolumeHeaderFile
2272 * Salvage the top level V*.vol header file. Make sure the special files
2273 * exist and that there are no duplicates.
2275 * Calls SalvageHeader for each possible type of volume special file.
2279 SalvageVolumeHeaderFile(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
,
2280 struct ViceInodeInfo
*inodes
, int RW
,
2281 int check
, int *deleteMe
)
2284 struct ViceInodeInfo
*ip
;
2285 int allinodesobsolete
= 1;
2286 struct VolumeDiskHeader diskHeader
;
2287 afs_int32 (*writefunc
)(VolumeDiskHeader_t
*, struct DiskPartition64
*) = NULL
;
2289 struct VolumeHeader tempHeader
;
2290 struct afs_inode_info stuff
[MAXINODETYPE
];
2292 /* keeps track of special inodes that are probably 'good'; they are
2293 * referenced in the vol header, and are included in the given inodes
2298 } goodspecial
[MAXINODETYPE
];
2303 memset(goodspecial
, 0, sizeof(goodspecial
));
2305 skip
= malloc(isp
->nSpecialInodes
* sizeof(*skip
));
2307 memset(skip
, 0, isp
->nSpecialInodes
* sizeof(*skip
));
2309 Log("cannot allocate memory for inode skip array when salvaging "
2310 "volume %lu; not performing duplicate special inode recovery\n",
2311 afs_printable_uint32_lu(isp
->volumeId
));
2312 /* still try to perform the salvage; the skip array only does anything
2313 * if we detect duplicate special inodes */
2316 init_inode_info(&tempHeader
, stuff
);
2319 * First, look at the special inodes and see if any are referenced by
2320 * the existing volume header. If we find duplicate special inodes, we
2321 * can use this information to use the referenced inode (it's more
2322 * likely to be the 'good' one), and throw away the duplicates.
2324 if (isp
->volSummary
&& skip
) {
2325 /* use tempHeader, so we can use the stuff[] array to easily index
2326 * into the isp->volSummary special inodes */
2327 memcpy(&tempHeader
, &isp
->volSummary
->header
, sizeof(struct VolumeHeader
));
2329 for (i
= 0; i
< isp
->nSpecialInodes
; i
++) {
2330 ip
= &inodes
[isp
->index
+ i
];
2331 if (ip
->u
.special
.type
<= 0 || ip
->u
.special
.type
> MAXINODETYPE
) {
2332 /* will get taken care of in a later loop */
2335 if (ip
->inodeNumber
== *(stuff
[ip
->u
.special
.type
- 1].inode
)) {
2336 goodspecial
[ip
->u
.special
.type
-1].valid
= 1;
2337 goodspecial
[ip
->u
.special
.type
-1].inode
= ip
->inodeNumber
;
2342 memset(&tempHeader
, 0, sizeof(tempHeader
));
2343 tempHeader
.stamp
.magic
= VOLUMEHEADERMAGIC
;
2344 tempHeader
.stamp
.version
= VOLUMEHEADERVERSION
;
2345 tempHeader
.id
= isp
->volumeId
;
2346 tempHeader
.parent
= isp
->RWvolumeId
;
2348 /* Check for duplicates (inodes are sorted by type field) */
2349 for (i
= 0; i
< isp
->nSpecialInodes
- 1; i
++) {
2350 ip
= &inodes
[isp
->index
+ i
];
2351 if (ip
->u
.special
.type
== (ip
+ 1)->u
.special
.type
) {
2352 afs_ino_str_t stmp1
, stmp2
;
2354 if (ip
->u
.special
.type
<= 0 || ip
->u
.special
.type
> MAXINODETYPE
) {
2355 /* Will be caught in the loop below */
2359 Log("Duplicate special %d inodes for volume %u found (%s, %s);\n",
2360 ip
->u
.special
.type
, isp
->volumeId
,
2361 PrintInode(stmp1
, ip
->inodeNumber
),
2362 PrintInode(stmp2
, (ip
+1)->inodeNumber
));
2364 if (skip
&& goodspecial
[ip
->u
.special
.type
-1].valid
) {
2365 Inode gi
= goodspecial
[ip
->u
.special
.type
-1].inode
;
2368 Log("using special inode referenced by vol header (%s)\n",
2369 PrintInode(stmp1
, gi
));
2372 /* the volume header references some special inode of
2373 * this type in the inodes array; are we it? */
2374 if (ip
->inodeNumber
!= gi
) {
2376 } else if ((ip
+1)->inodeNumber
!= gi
) {
2377 /* in case this is the last iteration; we need to
2378 * make sure we check ip+1, too */
2383 Log("cannot determine which is correct; salvage of volume %u aborted\n", isp
->volumeId
);
2391 for (i
= 0; i
< isp
->nSpecialInodes
; i
++) {
2393 ip
= &inodes
[isp
->index
+ i
];
2394 if (ip
->u
.special
.type
<= 0 || ip
->u
.special
.type
> MAXINODETYPE
) {
2396 Log("Rubbish header inode %s of type %d\n",
2397 PrintInode(stmp
, ip
->inodeNumber
),
2398 ip
->u
.special
.type
);
2404 Log("Rubbish header inode %s of type %d; deleted\n",
2405 PrintInode(stmp
, ip
->inodeNumber
),
2406 ip
->u
.special
.type
);
2407 } else if (!stuff
[ip
->u
.special
.type
- 1].obsolete
) {
2408 if (skip
&& skip
[i
]) {
2409 if (orphans
== ORPH_REMOVE
) {
2410 Log("Removing orphan special inode %s of type %d\n",
2411 PrintInode(stmp
, ip
->inodeNumber
), ip
->u
.special
.type
);
2414 Log("Ignoring orphan special inode %s of type %d\n",
2415 PrintInode(stmp
, ip
->inodeNumber
), ip
->u
.special
.type
);
2416 /* fall through to the ip->linkCount--; line below */
2419 *(stuff
[ip
->u
.special
.type
- 1].inode
) = ip
->inodeNumber
;
2420 allinodesobsolete
= 0;
2422 if (!check
&& ip
->u
.special
.type
!= VI_LINKTABLE
)
2423 ip
->linkCount
--; /* Keep the inode around */
2431 if (allinodesobsolete
) {
2438 salvinfo
->VGLinkH_cnt
++; /* one for every header. */
2440 if (!RW
&& !check
&& isp
->volSummary
) {
2441 ClearROInUseBit(isp
->volSummary
);
2445 for (i
= 0; i
< MAXINODETYPE
; i
++) {
2446 if (stuff
[i
].inodeType
== VI_LINKTABLE
) {
2447 /* Gross hack: SalvageHeader does a bcmp on the volume header.
2448 * And we may have recreated the link table earlier, so set the
2449 * RW header as well. The header magic was already checked.
2451 if (VALID_INO(salvinfo
->VGLinkH
->ih_ino
)) {
2452 *stuff
[i
].inode
= salvinfo
->VGLinkH
->ih_ino
;
2456 if (SalvageHeader(salvinfo
, &stuff
[i
], isp
, check
, deleteMe
) == -1 && check
)
2460 if (isp
->volSummary
== NULL
) {
2462 char headerName
[64];
2463 (void)afs_snprintf(headerName
, sizeof headerName
, VFORMAT
, afs_printable_uint32_lu(isp
->volumeId
));
2464 (void)afs_snprintf(path
, sizeof path
, "%s" OS_DIRSEP
"%s", salvinfo
->fileSysPath
, headerName
);
2466 Log("No header file for volume %u\n", isp
->volumeId
);
2470 Log("No header file for volume %u; %screating %s\n",
2471 isp
->volumeId
, (Testing
? "it would have been " : ""),
2473 isp
->volSummary
= calloc(1, sizeof(struct VolumeSummary
));
2475 writefunc
= VCreateVolumeDiskHeader
;
2478 char headerName
[64];
2479 /* hack: these two fields are obsolete... */
2480 isp
->volSummary
->header
.volumeAcl
= 0;
2481 isp
->volSummary
->header
.volumeMountTable
= 0;
2484 (&isp
->volSummary
->header
, &tempHeader
,
2485 sizeof(struct VolumeHeader
))) {
2486 VolumeExternalName_r(isp
->volumeId
, headerName
, sizeof(headerName
));
2487 (void)afs_snprintf(path
, sizeof path
, "%s" OS_DIRSEP
"%s", salvinfo
->fileSysPath
, headerName
);
2489 Log("Header file %s is damaged or no longer valid%s\n", path
,
2490 (check
? "" : "; repairing"));
2494 writefunc
= VWriteVolumeDiskHeader
;
2498 memcpy(&isp
->volSummary
->header
, &tempHeader
,
2499 sizeof(struct VolumeHeader
));
2502 Log("It would have written a new header file for volume %u\n",
2506 VolumeHeaderToDisk(&diskHeader
, &tempHeader
);
2507 code
= (*writefunc
)(&diskHeader
, salvinfo
->fileSysPartition
);
2509 Log("Error %ld writing volume header file for volume %lu\n",
2510 afs_printable_int32_ld(code
),
2511 afs_printable_uint32_lu(diskHeader
.id
));
2516 IH_INIT(isp
->volSummary
->volumeInfoHandle
, salvinfo
->fileSysDevice
, isp
->RWvolumeId
,
2517 isp
->volSummary
->header
.volumeInfo
);
2522 SalvageHeader(struct SalvInfo
*salvinfo
, struct afs_inode_info
*sp
,
2523 struct InodeSummary
*isp
, int check
, int *deleteMe
)
2526 VolumeDiskData volumeInfo
;
2527 struct versionStamp fileHeader
;
2536 #ifndef AFS_NAMEI_ENV
2537 if (sp
->inodeType
== VI_LINKTABLE
)
2538 return 0; /* header magic was already checked */
2540 if (*(sp
->inode
) == 0) {
2542 Log("Missing inode in volume header (%s)\n", sp
->description
);
2546 Log("Missing inode in volume header (%s); %s\n", sp
->description
,
2547 (Testing
? "it would have recreated it" : "recreating"));
2550 IH_CREATE(NULL
, salvinfo
->fileSysDevice
, salvinfo
->fileSysPath
, 0, isp
->volumeId
,
2551 INODESPECIAL
, sp
->inodeType
, isp
->RWvolumeId
);
2552 if (!VALID_INO(*(sp
->inode
)))
2554 ("Unable to allocate inode (%s) for volume header (error = %d)\n",
2555 sp
->description
, errno
);
2560 IH_INIT(specH
, salvinfo
->fileSysDevice
, isp
->RWvolumeId
, *(sp
->inode
));
2561 fdP
= IH_OPEN(specH
);
2562 if (OKToZap
&& (fdP
== NULL
) && BadError(errno
)) {
2563 /* bail out early and destroy the volume */
2565 Log("Still can't open volume header inode (%s), destroying volume\n", sp
->description
);
2572 Abort("Unable to open inode (%s) of volume header (error = %d)\n",
2573 sp
->description
, errno
);
2576 && (FDH_PREAD(fdP
, (char *)&header
, sp
->size
, 0) != sp
->size
2577 || header
.fileHeader
.magic
!= sp
->stamp
.magic
)) {
2579 Log("Part of the header (%s) is corrupted\n", sp
->description
);
2580 FDH_REALLYCLOSE(fdP
);
2584 Log("Part of the header (%s) is corrupted; recreating\n",
2587 /* header can be garbage; make sure we don't read garbage data from
2589 memset(&header
, 0, sizeof(header
));
2591 #ifdef AFS_NAMEI_ENV
2592 if (namei_FixSpecialOGM(fdP
, check
)) {
2593 Log("Error with namei header OGM data (%s)\n", sp
->description
);
2594 FDH_REALLYCLOSE(fdP
);
2599 if (sp
->inodeType
== VI_VOLINFO
2600 && header
.volumeInfo
.destroyMe
== DESTROY_ME
) {
2603 FDH_REALLYCLOSE(fdP
);
2607 if (recreate
&& !Testing
) {
2610 ("Internal error: recreating volume header (%s) in check mode\n",
2612 nBytes
= FDH_TRUNC(fdP
, 0);
2614 Abort("Unable to truncate volume header file (%s) (error = %d)\n",
2615 sp
->description
, errno
);
2617 /* The following code should be moved into vutil.c */
2618 if (sp
->inodeType
== VI_VOLINFO
) {
2620 memset(&header
.volumeInfo
, 0, sizeof(header
.volumeInfo
));
2621 header
.volumeInfo
.stamp
= sp
->stamp
;
2622 header
.volumeInfo
.id
= isp
->volumeId
;
2623 header
.volumeInfo
.parentId
= isp
->RWvolumeId
;
2624 sprintf(header
.volumeInfo
.name
, "bogus.%u", isp
->volumeId
);
2625 Log("Warning: the name of volume %u is now \"bogus.%u\"\n",
2626 isp
->volumeId
, isp
->volumeId
);
2627 header
.volumeInfo
.inService
= 0;
2628 header
.volumeInfo
.blessed
= 0;
2629 /* The + 1000 is a hack in case there are any files out in venus caches */
2630 header
.volumeInfo
.uniquifier
= (isp
->maxUniquifier
+ 1) + 1000;
2631 header
.volumeInfo
.type
= (isp
->volumeId
== isp
->RWvolumeId
? readwriteVolume
: readonlyVolume
); /* XXXX */
2632 header
.volumeInfo
.needsCallback
= 0;
2633 gettimeofday(&tp
, 0);
2634 header
.volumeInfo
.creationDate
= tp
.tv_sec
;
2636 FDH_PWRITE(fdP
, (char *)&header
.volumeInfo
,
2637 sizeof(header
.volumeInfo
), 0);
2638 if (nBytes
!= sizeof(header
.volumeInfo
)) {
2641 ("Unable to write volume header file (%s) (errno = %d)\n",
2642 sp
->description
, errno
);
2643 Abort("Unable to write entire volume header file (%s)\n",
2647 nBytes
= FDH_PWRITE(fdP
, (char *)&sp
->stamp
, sizeof(sp
->stamp
), 0);
2648 if (nBytes
!= sizeof(sp
->stamp
)) {
2651 ("Unable to write version stamp in volume header file (%s) (errno = %d)\n",
2652 sp
->description
, errno
);
2654 ("Unable to write entire version stamp in volume header file (%s)\n",
2659 FDH_REALLYCLOSE(fdP
);
2661 if (sp
->inodeType
== VI_VOLINFO
) {
2662 salvinfo
->VolInfo
= header
.volumeInfo
;
2666 if (salvinfo
->VolInfo
.updateDate
) {
2667 strcpy(update
, TimeStamp(salvinfo
->VolInfo
.updateDate
, 0));
2669 Log("%s (%u) %supdated %s\n", salvinfo
->VolInfo
.name
,
2670 salvinfo
->VolInfo
.id
,
2671 (Testing
? "it would have been " : ""), update
);
2673 strcpy(update
, TimeStamp(salvinfo
->VolInfo
.creationDate
, 0));
2675 Log("%s (%u) not updated (created %s)\n",
2676 salvinfo
->VolInfo
.name
, salvinfo
->VolInfo
.id
, update
);
2686 SalvageVnodes(struct SalvInfo
*salvinfo
,
2687 struct InodeSummary
*rwIsp
,
2688 struct InodeSummary
*thisIsp
,
2689 struct ViceInodeInfo
*inodes
, int check
)
2691 int ilarge
, ismall
, ioffset
, RW
, nInodes
;
2692 ioffset
= rwIsp
->index
+ rwIsp
->nSpecialInodes
; /* first inode */
2695 RW
= (rwIsp
== thisIsp
);
2696 nInodes
= (rwIsp
->nInodes
- rwIsp
->nSpecialInodes
);
2698 SalvageIndex(salvinfo
, thisIsp
->volSummary
->header
.smallVnodeIndex
, vSmall
, RW
,
2699 &inodes
[ioffset
], nInodes
, thisIsp
->volSummary
, check
);
2700 if (check
&& ismall
== -1)
2703 SalvageIndex(salvinfo
, thisIsp
->volSummary
->header
.largeVnodeIndex
, vLarge
, RW
,
2704 &inodes
[ioffset
], nInodes
, thisIsp
->volSummary
, check
);
2705 return (ilarge
== 0 && ismall
== 0 ? 0 : -1);
2709 SalvageIndex(struct SalvInfo
*salvinfo
, Inode ino
, VnodeClass
class, int RW
,
2710 struct ViceInodeInfo
*ip
, int nInodes
,
2711 struct VolumeSummary
*volSummary
, int check
)
2713 char buf
[SIZEOF_LARGEDISKVNODE
];
2714 struct VnodeDiskObject
*vnode
= (struct VnodeDiskObject
*)buf
;
2716 StreamHandle_t
*file
;
2717 struct VnodeClassInfo
*vcp
;
2719 afs_sfsize_t nVnodes
;
2720 afs_fsize_t vnodeLength
;
2722 afs_ino_str_t stmp1
, stmp2
;
2726 IH_INIT(handle
, salvinfo
->fileSysDevice
, volSummary
->header
.parent
, ino
);
2727 fdP
= IH_OPEN(handle
);
2728 osi_Assert(fdP
!= NULL
);
2729 file
= FDH_FDOPEN(fdP
, "r+");
2730 osi_Assert(file
!= NULL
);
2731 vcp
= &VnodeClassInfo
[class];
2732 size
= OS_SIZE(fdP
->fd_fd
);
2733 osi_Assert(size
!= -1);
2734 nVnodes
= (size
/ vcp
->diskSize
) - 1;
2736 osi_Assert((nVnodes
+ 1) * vcp
->diskSize
== size
);
2737 osi_Assert(STREAM_ASEEK(file
, vcp
->diskSize
) == 0);
2741 for (vnodeIndex
= 0;
2742 nVnodes
&& STREAM_READ(vnode
, vcp
->diskSize
, 1, file
) == 1;
2743 nVnodes
--, vnodeIndex
++) {
2744 if (vnode
->type
!= vNull
) {
2745 int vnodeChanged
= 0;
2746 int vnodeNumber
= bitNumberToVnodeNumber(vnodeIndex
, class);
2747 /* Log programs that belong to root (potentially suid root);
2748 * don't bother for read-only or backup volumes */
2749 #ifdef notdef /* This is done elsewhere */
2750 if (ShowRootFiles
&& RW
&& vnode
->owner
== 0 && vnodeNumber
!= 1)
2751 Log("OWNER IS ROOT %s %u dir %u vnode %u author %u owner %u mode %o\n", salvinfo
->VolInfo
.name
, volumeNumber
, vnode
->parent
, vnodeNumber
, vnode
->author
, vnode
->owner
, vnode
->modeBits
);
2753 if (VNDISK_GET_INO(vnode
) == 0) {
2755 /* Log("### DEBUG ### Deleted Vnode with 0 inode (vnode %d)\n", vnodeNumber); */
2756 memset(vnode
, 0, vcp
->diskSize
);
2760 if (vcp
->magic
!= vnode
->vnodeMagic
) {
2761 /* bad magic #, probably partially created vnode */
2763 Log("Partially allocated vnode %d: bad magic (is %lx should be %lx)\n",
2764 vnodeNumber
, afs_printable_uint32_lu(vnode
->vnodeMagic
),
2765 afs_printable_uint32_lu(vcp
->magic
));
2766 memset(vnode
, 0, vcp
->diskSize
);
2770 Log("Partially allocated vnode %d deleted.\n",
2772 memset(vnode
, 0, vcp
->diskSize
);
2776 /* ****** Should do a bit more salvage here: e.g. make sure
2777 * vnode type matches what it should be given the index */
2778 while (nInodes
&& ip
->u
.vnode
.vnodeNumber
< vnodeNumber
) {
2779 /* if (vnodeIdToClass(ip->u.vnode.vnodeNumber) == class && RW) {
2780 * Log("Inode %d: says it belongs to non-existing vnode %d\n",
2781 * ip->inodeNumber, ip->u.vnode.vnodeNumber);
2788 while (nInodes
&& ip
->u
.vnode
.vnodeNumber
== vnodeNumber
) {
2789 /* The following doesn't work, because the version number
2790 * is not maintained correctly by the file server */
2791 /*if (vnode->uniquifier == ip->u.vnode.vnodeUniquifier &&
2792 * vnode->dataVersion == ip->u.vnode.inodeDataVersion)
2794 if (VNDISK_GET_INO(vnode
) == ip
->inodeNumber
)
2800 /* For RW volume, look for vnode with matching inode number;
2801 * if no such match, take the first determined by our sort
2803 struct ViceInodeInfo
*lip
= ip
;
2804 int lnInodes
= nInodes
;
2806 && lip
->u
.vnode
.vnodeNumber
== vnodeNumber
) {
2807 if (VNDISK_GET_INO(vnode
) == lip
->inodeNumber
) {
2816 if (nInodes
&& ip
->u
.vnode
.vnodeNumber
== vnodeNumber
) {
2817 /* "Matching" inode */
2821 vu
= vnode
->uniquifier
;
2822 iu
= ip
->u
.vnode
.vnodeUniquifier
;
2823 vd
= vnode
->dataVersion
;
2824 id
= ip
->u
.vnode
.inodeDataVersion
;
2826 * Because of the possibility of the uniquifier overflows (> 4M)
2827 * we compare them modulo the low 22-bits; we shouldn't worry
2828 * about mismatching since they shouldn't to many old
2829 * uniquifiers of the same vnode...
2831 if (IUnique(vu
) != IUnique(iu
)) {
2833 Log("Vnode %u: vnode.unique, %u, does not match inode unique, %u; fixed, but status will be wrong\n", vnodeNumber
, IUnique(vu
), IUnique(iu
));
2836 vnode
->uniquifier
= iu
;
2837 #ifdef AFS_3DISPARES
2838 vnode
->dataVersion
= (id
>= vd
?
2841 1887437 ? vd
: id
) :
2844 1887437 ? id
: vd
));
2846 #if defined(AFS_SGI_EXMAG)
2847 vnode
->dataVersion
= (id
>= vd
?
2850 15099494 ? vd
: id
) :
2853 15099494 ? id
: vd
));
2855 vnode
->dataVersion
= (id
> vd
? id
: vd
);
2856 #endif /* AFS_SGI_EXMAG */
2857 #endif /* AFS_3DISPARES */
2860 /* don't bother checking for vd > id any more, since
2861 * partial file transfers always result in this state,
2862 * and you can't do much else anyway (you've already
2863 * found the best data you can) */
2864 #ifdef AFS_3DISPARES
2865 if (!vnodeIsDirectory(vnodeNumber
)
2866 && ((vd
< id
&& (id
- vd
) < 1887437)
2867 || ((vd
> id
&& (vd
- id
) > 1887437)))) {
2869 #if defined(AFS_SGI_EXMAG)
2870 if (!vnodeIsDirectory(vnodeNumber
)
2871 && ((vd
< id
&& (id
- vd
) < 15099494)
2872 || ((vd
> id
&& (vd
- id
) > 15099494)))) {
2874 if (!vnodeIsDirectory(vnodeNumber
) && vd
< id
) {
2875 #endif /* AFS_SGI_EXMAG */
2878 Log("Vnode %d: version < inode version; fixed (old status)\n", vnodeNumber
);
2879 vnode
->dataVersion
= id
;
2884 if (ip
->inodeNumber
!= VNDISK_GET_INO(vnode
)) {
2887 Log("Vnode %d: inode number incorrect (is %s should be %s). FileSize=%llu\n", vnodeNumber
, PrintInode(stmp1
, VNDISK_GET_INO(vnode
)), PrintInode(stmp2
, ip
->inodeNumber
), (afs_uintmax_t
) ip
->byteCount
);
2889 VNDISK_SET_INO(vnode
, ip
->inodeNumber
);
2894 Log("Vnode %d: inode number incorrect; changed from %s to %s. FileSize=%llu\n", vnodeNumber
, PrintInode(stmp1
, VNDISK_GET_INO(vnode
)), PrintInode(stmp2
, ip
->inodeNumber
), (afs_uintmax_t
) ip
->byteCount
);
2896 VNDISK_SET_INO(vnode
, ip
->inodeNumber
);
2899 VNDISK_GET_LEN(vnodeLength
, vnode
);
2900 if (ip
->byteCount
!= vnodeLength
) {
2903 Log("Vnode %d: length incorrect; (is %llu should be %llu)\n", vnodeNumber
, (afs_uintmax_t
) vnodeLength
, (afs_uintmax_t
) ip
->byteCount
);
2908 Log("Vnode %d: length incorrect; changed from %llu to %llu\n", vnodeNumber
, (afs_uintmax_t
) vnodeLength
, (afs_uintmax_t
) ip
->byteCount
);
2909 VNDISK_SET_LEN(vnode
, ip
->byteCount
);
2913 ip
->linkCount
--; /* Keep the inode around */
2916 } else { /* no matching inode */
2918 if (VNDISK_GET_INO(vnode
) != 0
2919 || vnode
->type
== vDirectory
) {
2920 /* No matching inode--get rid of the vnode */
2922 if (VNDISK_GET_INO(vnode
)) {
2924 Log("Vnode %d (unique %u): corresponding inode %s is missing\n", vnodeNumber
, vnode
->uniquifier
, PrintInode(stmp
, VNDISK_GET_INO(vnode
)));
2928 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed)\n", vnodeNumber
, vnode
->uniquifier
);
2933 if (VNDISK_GET_INO(vnode
)) {
2935 time_t serverModifyTime
= vnode
->serverModifyTime
;
2936 Log("Vnode %d (unique %u): corresponding inode %s is missing; vnode deleted, vnode mod time=%s", vnodeNumber
, vnode
->uniquifier
, PrintInode(stmp
, VNDISK_GET_INO(vnode
)), ctime(&serverModifyTime
));
2940 time_t serverModifyTime
= vnode
->serverModifyTime
;
2941 Log("Vnode %d (unique %u): bad directory vnode (no inode number listed); vnode deleted, vnode mod time=%s", vnodeNumber
, vnode
->uniquifier
, ctime(&serverModifyTime
));
2944 memset(vnode
, 0, vcp
->diskSize
);
2947 /* Should not reach here becuase we checked for
2948 * (inodeNumber == 0) above. And where we zero the vnode,
2949 * we also goto vnodeDone.
2953 while (nInodes
&& ip
->u
.vnode
.vnodeNumber
== vnodeNumber
) {
2957 } /* VNDISK_GET_INO(vnode) != 0 */
2959 osi_Assert(!(vnodeChanged
&& check
));
2960 if (vnodeChanged
&& !Testing
) {
2961 osi_Assert(IH_IWRITE
2962 (handle
, vnodeIndexOffset(vcp
, vnodeNumber
),
2963 (char *)vnode
, vcp
->diskSize
)
2965 salvinfo
->VolumeChanged
= 1; /* For break call back */
2976 struct VnodeEssence
*
2977 CheckVnodeNumber(struct SalvInfo
*salvinfo
, VnodeId vnodeNumber
)
2980 struct VnodeInfo
*vip
;
2983 class = vnodeIdToClass(vnodeNumber
);
2984 vip
= &salvinfo
->vnodeInfo
[class];
2985 offset
= vnodeIdToBitNumber(vnodeNumber
);
2986 return (offset
>= vip
->nVnodes
? NULL
: &vip
->vnodes
[offset
]);
2990 CopyOnWrite(struct SalvInfo
*salvinfo
, struct DirSummary
*dir
)
2992 /* Copy the directory unconditionally if we are going to change it:
2993 * not just if was cloned.
2995 struct VnodeDiskObject vnode
;
2996 struct VnodeClassInfo
*vcp
= &VnodeClassInfo
[vLarge
];
2997 Inode oldinode
, newinode
;
3000 if (dir
->copied
|| Testing
)
3002 DFlush(); /* Well justified paranoia... */
3005 IH_IREAD(salvinfo
->vnodeInfo
[vLarge
].handle
,
3006 vnodeIndexOffset(vcp
, dir
->vnodeNumber
), (char *)&vnode
,
3008 osi_Assert(code
== sizeof(vnode
));
3009 oldinode
= VNDISK_GET_INO(&vnode
);
3010 /* Increment the version number by a whole lot to avoid problems with
3011 * clients that were promised new version numbers--but the file server
3012 * crashed before the versions were written to disk.
3015 IH_CREATE(dir
->ds_linkH
, salvinfo
->fileSysDevice
, salvinfo
->fileSysPath
, 0, dir
->rwVid
,
3016 dir
->vnodeNumber
, vnode
.uniquifier
, vnode
.dataVersion
+=
3018 osi_Assert(VALID_INO(newinode
));
3019 osi_Assert(CopyInode(salvinfo
->fileSysDevice
, oldinode
, newinode
, dir
->rwVid
) == 0);
3021 VNDISK_SET_INO(&vnode
, newinode
);
3023 IH_IWRITE(salvinfo
->vnodeInfo
[vLarge
].handle
,
3024 vnodeIndexOffset(vcp
, dir
->vnodeNumber
), (char *)&vnode
,
3026 osi_Assert(code
== sizeof(vnode
));
3028 SetSalvageDirHandle(&dir
->dirHandle
, dir
->dirHandle
.dirh_handle
->ih_vid
,
3029 salvinfo
->fileSysDevice
, newinode
,
3030 &salvinfo
->VolumeChanged
);
3031 /* Don't delete the original inode right away, because the directory is
3032 * still being scanned.
3038 * This function should either successfully create a new dir, or give up
3039 * and leave things the way they were. In particular, if it fails to write
3040 * the new dir properly, it should return w/o changing the reference to the
3044 CopyAndSalvage(struct SalvInfo
*salvinfo
, struct DirSummary
*dir
)
3046 struct VnodeDiskObject vnode
;
3047 struct VnodeClassInfo
*vcp
= &VnodeClassInfo
[vLarge
];
3048 Inode oldinode
, newinode
;
3053 afs_int32 parentUnique
= 1;
3054 struct VnodeEssence
*vnodeEssence
;
3059 Log("Salvaging directory %u...\n", dir
->vnodeNumber
);
3061 IH_IREAD(salvinfo
->vnodeInfo
[vLarge
].handle
,
3062 vnodeIndexOffset(vcp
, dir
->vnodeNumber
), (char *)&vnode
,
3064 osi_Assert(lcode
== sizeof(vnode
));
3065 oldinode
= VNDISK_GET_INO(&vnode
);
3066 /* Increment the version number by a whole lot to avoid problems with
3067 * clients that were promised new version numbers--but the file server
3068 * crashed before the versions were written to disk.
3071 IH_CREATE(dir
->ds_linkH
, salvinfo
->fileSysDevice
, salvinfo
->fileSysPath
, 0, dir
->rwVid
,
3072 dir
->vnodeNumber
, vnode
.uniquifier
, vnode
.dataVersion
+=
3074 osi_Assert(VALID_INO(newinode
));
3075 SetSalvageDirHandle(&newdir
, dir
->rwVid
, salvinfo
->fileSysDevice
, newinode
,
3076 &salvinfo
->VolumeChanged
);
3078 /* Assign . and .. vnode numbers from dir and vnode.parent.
3079 * The uniquifier for . is in the vnode.
3080 * The uniquifier for .. might be set to a bogus value of 1 and
3081 * the salvager will later clean it up.
3083 if (vnode
.parent
&& (vnodeEssence
= CheckVnodeNumber(salvinfo
, vnode
.parent
))) {
3084 parentUnique
= (vnodeEssence
->unique
? vnodeEssence
->unique
: 1);
3087 DirSalvage(&dir
->dirHandle
, &newdir
, dir
->vnodeNumber
,
3089 (vnode
.parent
? vnode
.parent
: dir
->vnodeNumber
),
3094 /* didn't really build the new directory properly, let's just give up. */
3095 code
= IH_DEC(dir
->ds_linkH
, newinode
, dir
->rwVid
);
3096 Log("Directory salvage returned code %d, continuing.\n", code
);
3098 Log("also failed to decrement link count on new inode");
3102 Log("Checking the results of the directory salvage...\n");
3103 if (!DirOK(&newdir
)) {
3104 Log("Directory salvage failed!!!; restoring old version of the directory.\n");
3105 code
= IH_DEC(dir
->ds_linkH
, newinode
, dir
->rwVid
);
3106 osi_Assert(code
== 0);
3110 VNDISK_SET_INO(&vnode
, newinode
);
3111 length
= Length(&newdir
);
3112 VNDISK_SET_LEN(&vnode
, length
);
3114 IH_IWRITE(salvinfo
->vnodeInfo
[vLarge
].handle
,
3115 vnodeIndexOffset(vcp
, dir
->vnodeNumber
), (char *)&vnode
,
3117 osi_Assert(lcode
== sizeof(vnode
));
3118 IH_CONDSYNC(salvinfo
->vnodeInfo
[vLarge
].handle
);
3120 /* make sure old directory file is really closed */
3121 fdP
= IH_OPEN(dir
->dirHandle
.dirh_handle
);
3122 FDH_REALLYCLOSE(fdP
);
3124 code
= IH_DEC(dir
->ds_linkH
, oldinode
, dir
->rwVid
);
3125 osi_Assert(code
== 0);
3126 dir
->dirHandle
= newdir
;
3130 * arguments for JudgeEntry.
3132 struct judgeEntry_params
{
3133 struct DirSummary
*dir
; /**< directory we're examining entries in */
3134 struct SalvInfo
*salvinfo
; /**< SalvInfo for the current salvage job */
3138 JudgeEntry(void *arock
, char *name
, afs_int32 vnodeNumber
,
3141 struct judgeEntry_params
*params
= arock
;
3142 struct DirSummary
*dir
= params
->dir
;
3143 struct SalvInfo
*salvinfo
= params
->salvinfo
;
3144 struct VnodeEssence
*vnodeEssence
;
3145 afs_int32 dirOrphaned
, todelete
;
3147 dirOrphaned
= IsVnodeOrphaned(salvinfo
, dir
->vnodeNumber
);
3149 vnodeEssence
= CheckVnodeNumber(salvinfo
, vnodeNumber
);
3150 if (vnodeEssence
== NULL
) {
3152 Log("dir vnode %u: invalid entry deleted: %s" OS_DIRSEP
"%s (vnode %u, unique %u)\n", dir
->vnodeNumber
, dir
->name
? dir
->name
: "??", name
, vnodeNumber
, unique
);
3155 CopyOnWrite(salvinfo
, dir
);
3156 osi_Assert(Delete(&dir
->dirHandle
, name
) == 0);
3161 #ifndef AFS_NAMEI_ENV
3162 /* On AIX machines, don't allow entries to point to inode 0. That is a special
3163 * mount inode for the partition. If this inode were deleted, it would crash
3166 if (vnodeEssence
->InodeNumber
== 0) {
3167 Log("dir vnode %d: invalid entry: %s" OS_DIRSEP
"%s has no inode (vnode %d, unique %d)%s\n", dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
, vnodeNumber
, unique
, (Testing
? "-- would have deleted" : " -- deleted"));
3169 CopyOnWrite(salvinfo
, dir
);
3170 osi_Assert(Delete(&dir
->dirHandle
, name
) == 0);
3177 if (!(vnodeNumber
& 1) && !Showmode
3178 && !(vnodeEssence
->count
|| vnodeEssence
->unique
3179 || vnodeEssence
->modeBits
)) {
3180 Log("dir vnode %u: invalid entry: %s" OS_DIRSEP
"%s (vnode %u, unique %u)%s\n",
3181 dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
,
3182 vnodeNumber
, unique
,
3183 ((!unique
) ? (Testing
? "-- would have deleted" : " -- deleted") :
3187 CopyOnWrite(salvinfo
, dir
);
3188 osi_Assert(Delete(&dir
->dirHandle
, name
) == 0);
3194 /* Check if the Uniquifiers match. If not, change the directory entry
3195 * so its unique matches the vnode unique. Delete if the unique is zero
3196 * or if the directory is orphaned.
3198 if (!vnodeEssence
->unique
|| (vnodeEssence
->unique
) != unique
) {
3199 todelete
= ((!vnodeEssence
->unique
|| dirOrphaned
) ? 1 : 0);
3202 && ((strcmp(name
, "..") == 0) || (strcmp(name
, ".") == 0))) {
3204 /* This is an orphaned directory. Don't delete the . or ..
3205 * entry. Otherwise, it will get created in the next
3206 * salvage and deleted again here. So Just skip it.
3210 /* (vnodeEssence->unique == 0 && ('.' || '..'));
3211 * Entries arriving here should be deleted, but the directory
3212 * is not orphaned. Therefore, the entry must be pointing at
3213 * the wrong vnode. Skip the 'else' clause and fall through;
3214 * the code below will repair the entry so it correctly points
3215 * at the vnode of the current directory (if '.') or the parent
3216 * directory (if '..'). */
3219 Log("dir vnode %u: %s" OS_DIRSEP
"%s (vnode %u): unique changed from %u to %u %s\n",
3220 dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
, vnodeNumber
, unique
,
3221 vnodeEssence
->unique
, (!todelete
? "" : (Testing
? "-- would have deleted" : "-- deleted")));
3225 fid
.Vnode
= vnodeNumber
;
3226 fid
.Unique
= vnodeEssence
->unique
;
3227 CopyOnWrite(salvinfo
, dir
);
3228 osi_Assert(Delete(&dir
->dirHandle
, name
) == 0);
3230 osi_Assert(Create(&dir
->dirHandle
, name
, &fid
) == 0);
3233 return 0; /* no need to continue */
3237 if (strcmp(name
, ".") == 0) {
3238 if (dir
->vnodeNumber
!= vnodeNumber
|| (dir
->unique
!= unique
)) {
3241 Log("directory vnode %u.%u: bad '.' entry (was %u.%u); fixed\n", dir
->vnodeNumber
, dir
->unique
, vnodeNumber
, unique
);
3243 CopyOnWrite(salvinfo
, dir
);
3244 osi_Assert(Delete(&dir
->dirHandle
, ".") == 0);
3245 fid
.Vnode
= dir
->vnodeNumber
;
3246 fid
.Unique
= dir
->unique
;
3247 osi_Assert(Create(&dir
->dirHandle
, ".", &fid
) == 0);
3250 vnodeNumber
= fid
.Vnode
; /* Get the new Essence */
3251 unique
= fid
.Unique
;
3252 vnodeEssence
= CheckVnodeNumber(salvinfo
, vnodeNumber
);
3255 } else if (strcmp(name
, "..") == 0) {
3258 struct VnodeEssence
*dotdot
;
3259 pa
.Vnode
= dir
->parent
;
3260 dotdot
= CheckVnodeNumber(salvinfo
, pa
.Vnode
);
3261 osi_Assert(dotdot
!= NULL
); /* XXX Should not be assert */
3262 pa
.Unique
= dotdot
->unique
;
3264 pa
.Vnode
= dir
->vnodeNumber
;
3265 pa
.Unique
= dir
->unique
;
3267 if ((pa
.Vnode
!= vnodeNumber
) || (pa
.Unique
!= unique
)) {
3269 Log("directory vnode %u.%u: bad '..' entry (was %u.%u); fixed\n", dir
->vnodeNumber
, dir
->unique
, vnodeNumber
, unique
);
3271 CopyOnWrite(salvinfo
, dir
);
3272 osi_Assert(Delete(&dir
->dirHandle
, "..") == 0);
3273 osi_Assert(Create(&dir
->dirHandle
, "..", &pa
) == 0);
3276 vnodeNumber
= pa
.Vnode
; /* Get the new Essence */
3278 vnodeEssence
= CheckVnodeNumber(salvinfo
, vnodeNumber
);
3280 dir
->haveDotDot
= 1;
3281 } else if (strncmp(name
, ".__afs", 6) == 0) {
3283 Log("dir vnode %u: special old unlink-while-referenced file %s %s deleted (vnode %u)\n", dir
->vnodeNumber
, name
, (Testing
? "would have been" : "is"), vnodeNumber
);
3286 CopyOnWrite(salvinfo
, dir
);
3287 osi_Assert(Delete(&dir
->dirHandle
, name
) == 0);
3289 vnodeEssence
->claimed
= 0; /* Not claimed: Orphaned */
3290 vnodeEssence
->todelete
= 1; /* Will later delete vnode and decr inode */
3293 if (ShowSuid
&& (vnodeEssence
->modeBits
& 06000))
3294 Log("FOUND suid/sgid file: %s" OS_DIRSEP
"%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir
->name
? dir
->name
: "??", name
, vnodeEssence
->owner
, vnodeEssence
->group
, vnodeEssence
->modeBits
, vnodeEssence
->author
, vnodeNumber
, dir
->vnodeNumber
);
3295 if (/* ShowMounts && */ (vnodeEssence
->type
== vSymlink
)
3296 && !(vnodeEssence
->modeBits
& 0111)) {
3297 afs_sfsize_t nBytes
;
3303 IH_INIT(ihP
, salvinfo
->fileSysDevice
, dir
->dirHandle
.dirh_handle
->ih_vid
,
3304 vnodeEssence
->InodeNumber
);
3307 Log("ERROR %s could not open mount point vnode %u\n", dir
->vname
, vnodeNumber
);
3311 size
= FDH_SIZE(fdP
);
3313 Log("ERROR %s mount point has invalid size %d, vnode %u\n", dir
->vname
, (int)size
, vnodeNumber
);
3314 FDH_REALLYCLOSE(fdP
);
3321 nBytes
= FDH_PREAD(fdP
, buf
, size
, 0);
3322 if (nBytes
== size
) {
3324 if ( (*buf
!= '#' && *buf
!= '%') || buf
[strlen(buf
)-1] != '.' ) {
3325 Log("Volume %u (%s) mount point %s" OS_DIRSEP
"%s to '%s' invalid, %s to symbolic link\n",
3326 dir
->dirHandle
.dirh_handle
->ih_vid
, dir
->vname
, dir
->name
? dir
->name
: "??", name
, buf
,
3327 Testing
? "would convert" : "converted");
3328 vnodeEssence
->modeBits
|= 0111;
3329 vnodeEssence
->changed
= 1;
3330 } else if (ShowMounts
) Log("In volume %u (%s) found mountpoint %s" OS_DIRSEP
"%s to '%s'\n",
3331 dir
->dirHandle
.dirh_handle
->ih_vid
, dir
->vname
,
3332 dir
->name
? dir
->name
: "??", name
, buf
);
3334 Log("Volume %s cound not read mount point vnode %u size %d code %d\n",
3335 dir
->vname
, vnodeNumber
, (int)size
, (int)nBytes
);
3337 FDH_REALLYCLOSE(fdP
);
3340 if (ShowRootFiles
&& vnodeEssence
->owner
== 0 && vnodeNumber
!= 1)
3341 Log("FOUND root file: %s" OS_DIRSEP
"%s (%u.%u %05o) author %u (vnode %u dir %u)\n", dir
->name
? dir
->name
: "??", name
, vnodeEssence
->owner
, vnodeEssence
->group
, vnodeEssence
->modeBits
, vnodeEssence
->author
, vnodeNumber
, dir
->vnodeNumber
);
3342 if (vnodeIdToClass(vnodeNumber
) == vLarge
3343 && vnodeEssence
->name
== NULL
) {
3345 if ((n
= (char *)malloc(strlen(name
) + 1)))
3347 vnodeEssence
->name
= n
;
3350 /* The directory entry points to the vnode. Check to see if the
3351 * vnode points back to the directory. If not, then let the
3352 * directory claim it (else it might end up orphaned). Vnodes
3353 * already claimed by another directory are deleted from this
3354 * directory: hardlinks to the same vnode are not allowed
3355 * from different directories.
3357 if (vnodeEssence
->parent
!= dir
->vnodeNumber
) {
3358 if (!vnodeEssence
->claimed
&& !dirOrphaned
&& vnodeNumber
!= 1) {
3359 /* Vnode does not point back to this directory.
3360 * Orphaned dirs cannot claim a file (it may belong to
3361 * another non-orphaned dir).
3364 Log("dir vnode %u: %s" OS_DIRSEP
"%s (vnode %u, unique %u) -- parent vnode %schanged from %u to %u\n", dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
, vnodeNumber
, unique
, (Testing
? "would have been " : ""), vnodeEssence
->parent
, dir
->vnodeNumber
);
3366 vnodeEssence
->parent
= dir
->vnodeNumber
;
3367 vnodeEssence
->changed
= 1;
3369 /* Vnode was claimed by another directory */
3372 Log("dir vnode %u: %s" OS_DIRSEP
"%s parent vnode is %u (vnode %u, unique %u) -- %sdeleted\n", dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
, vnodeEssence
->parent
, vnodeNumber
, unique
, (Testing
? "would have been " : ""));
3373 } else if (vnodeNumber
== 1) {
3374 Log("dir vnode %d: %s" OS_DIRSEP
"%s is invalid (vnode %d, unique %d) -- %sdeleted\n", dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
, vnodeNumber
, unique
, (Testing
? "would have been " : ""));
3376 Log("dir vnode %u: %s" OS_DIRSEP
"%s already claimed by directory vnode %u (vnode %u, unique %u) -- %sdeleted\n", dir
->vnodeNumber
, (dir
->name
? dir
->name
: "??"), name
, vnodeEssence
->parent
, vnodeNumber
, unique
, (Testing
? "would have been " : ""));
3380 CopyOnWrite(salvinfo
, dir
);
3381 osi_Assert(Delete(&dir
->dirHandle
, name
) == 0);
3386 /* This directory claims the vnode */
3387 vnodeEssence
->claimed
= 1;
3389 vnodeEssence
->count
--;
3394 DistilVnodeEssence(struct SalvInfo
*salvinfo
, VolumeId rwVId
,
3395 VnodeClass
class, Inode ino
, Unique
* maxu
)
3397 struct VnodeInfo
*vip
= &salvinfo
->vnodeInfo
[class];
3398 struct VnodeClassInfo
*vcp
= &VnodeClassInfo
[class];
3399 char buf
[SIZEOF_LARGEDISKVNODE
];
3400 struct VnodeDiskObject
*vnode
= (struct VnodeDiskObject
*)buf
;
3402 StreamHandle_t
*file
;
3407 IH_INIT(vip
->handle
, salvinfo
->fileSysDevice
, rwVId
, ino
);
3408 fdP
= IH_OPEN(vip
->handle
);
3409 osi_Assert(fdP
!= NULL
);
3410 file
= FDH_FDOPEN(fdP
, "r+");
3411 osi_Assert(file
!= NULL
);
3412 size
= OS_SIZE(fdP
->fd_fd
);
3413 osi_Assert(size
!= -1);
3414 vip
->nVnodes
= (size
/ vcp
->diskSize
) - 1;
3415 if (vip
->nVnodes
> 0) {
3416 osi_Assert((vip
->nVnodes
+ 1) * vcp
->diskSize
== size
);
3417 osi_Assert(STREAM_ASEEK(file
, vcp
->diskSize
) == 0);
3418 osi_Assert((vip
->vnodes
= (struct VnodeEssence
*)
3419 calloc(vip
->nVnodes
, sizeof(struct VnodeEssence
))) != NULL
);
3420 if (class == vLarge
) {
3421 osi_Assert((vip
->inodes
= (Inode
*)
3422 calloc(vip
->nVnodes
, sizeof(Inode
))) != NULL
);
3431 vip
->volumeBlockCount
= vip
->nAllocatedVnodes
= 0;
3432 for (vnodeIndex
= 0, nVnodes
= vip
->nVnodes
;
3433 nVnodes
&& STREAM_READ(vnode
, vcp
->diskSize
, 1, file
) == 1;
3434 nVnodes
--, vnodeIndex
++) {
3435 if (vnode
->type
!= vNull
) {
3436 struct VnodeEssence
*vep
= &vip
->vnodes
[vnodeIndex
];
3437 afs_fsize_t vnodeLength
;
3438 vip
->nAllocatedVnodes
++;
3439 vep
->count
= vnode
->linkCount
;
3440 VNDISK_GET_LEN(vnodeLength
, vnode
);
3441 vep
->blockCount
= nBlocks(vnodeLength
);
3442 vip
->volumeBlockCount
+= vep
->blockCount
;
3443 vep
->parent
= vnode
->parent
;
3444 vep
->unique
= vnode
->uniquifier
;
3445 if (*maxu
< vnode
->uniquifier
)
3446 *maxu
= vnode
->uniquifier
;
3447 vep
->modeBits
= vnode
->modeBits
;
3448 vep
->InodeNumber
= VNDISK_GET_INO(vnode
);
3449 vep
->type
= vnode
->type
;
3450 vep
->author
= vnode
->author
;
3451 vep
->owner
= vnode
->owner
;
3452 vep
->group
= vnode
->group
;
3453 if (vnode
->type
== vDirectory
) {
3454 if (class != vLarge
) {
3455 VnodeId vnodeNumber
= bitNumberToVnodeNumber(vnodeIndex
, class);
3456 vip
->nAllocatedVnodes
--;
3457 memset(vnode
, 0, sizeof(*vnode
));
3458 IH_IWRITE(salvinfo
->vnodeInfo
[vSmall
].handle
,
3459 vnodeIndexOffset(vcp
, vnodeNumber
),
3460 (char *)&vnode
, sizeof(vnode
));
3461 salvinfo
->VolumeChanged
= 1;
3463 vip
->inodes
[vnodeIndex
] = VNDISK_GET_INO(vnode
);
3472 GetDirName(struct SalvInfo
*salvinfo
, VnodeId vnode
, struct VnodeEssence
*vp
,
3475 struct VnodeEssence
*parentvp
;
3481 if (vp
->parent
&& vp
->name
&& (parentvp
= CheckVnodeNumber(salvinfo
, vp
->parent
))
3482 && GetDirName(salvinfo
, vp
->parent
, parentvp
, path
)) {
3483 strcat(path
, OS_DIRSEP
);
3484 strcat(path
, vp
->name
);
3490 /* To determine if a vnode is orhpaned or not, the vnode and all its parent
3491 * vnodes must be "claimed". The vep->claimed flag is set in JudgeEntry().
3494 IsVnodeOrphaned(struct SalvInfo
*salvinfo
, VnodeId vnode
)
3496 struct VnodeEssence
*vep
;
3499 return (1); /* Vnode zero does not exist */
3501 return (0); /* The root dir vnode is always claimed */
3502 vep
= CheckVnodeNumber(salvinfo
, vnode
); /* Get the vnode essence */
3503 if (!vep
|| !vep
->claimed
)
3504 return (1); /* Vnode is not claimed - it is orphaned */
3506 return (IsVnodeOrphaned(salvinfo
, vep
->parent
));
3510 SalvageDir(struct SalvInfo
*salvinfo
, char *name
, VolumeId rwVid
,
3511 struct VnodeInfo
*dirVnodeInfo
, IHandle_t
* alinkH
, int i
,
3512 struct DirSummary
*rootdir
, int *rootdirfound
)
3514 static struct DirSummary dir
;
3515 static struct DirHandle dirHandle
;
3516 struct VnodeEssence
*parent
;
3517 static char path
[MAXPATHLEN
];
3520 if (dirVnodeInfo
->vnodes
[i
].salvaged
)
3521 return; /* already salvaged */
3524 dirVnodeInfo
->vnodes
[i
].salvaged
= 1;
3526 if (dirVnodeInfo
->inodes
[i
] == 0)
3527 return; /* Not allocated to a directory */
3529 if (bitNumberToVnodeNumber(i
, vLarge
) == 1) {
3530 if (dirVnodeInfo
->vnodes
[i
].parent
) {
3531 Log("Bad parent, vnode 1; %s...\n",
3532 (Testing
? "skipping" : "salvaging"));
3533 dirVnodeInfo
->vnodes
[i
].parent
= 0;
3534 dirVnodeInfo
->vnodes
[i
].changed
= 1;
3537 parent
= CheckVnodeNumber(salvinfo
, dirVnodeInfo
->vnodes
[i
].parent
);
3538 if (parent
&& parent
->salvaged
== 0)
3539 SalvageDir(salvinfo
, name
, rwVid
, dirVnodeInfo
, alinkH
,
3540 vnodeIdToBitNumber(dirVnodeInfo
->vnodes
[i
].parent
),
3541 rootdir
, rootdirfound
);
3544 dir
.vnodeNumber
= bitNumberToVnodeNumber(i
, vLarge
);
3545 dir
.unique
= dirVnodeInfo
->vnodes
[i
].unique
;
3548 dir
.parent
= dirVnodeInfo
->vnodes
[i
].parent
;
3549 dir
.haveDot
= dir
.haveDotDot
= 0;
3550 dir
.ds_linkH
= alinkH
;
3551 SetSalvageDirHandle(&dir
.dirHandle
, dir
.rwVid
, salvinfo
->fileSysDevice
,
3552 dirVnodeInfo
->inodes
[i
], &salvinfo
->VolumeChanged
);
3554 dirok
= ((RebuildDirs
&& !Testing
) ? 0 : DirOK(&dir
.dirHandle
));
3557 Log("Directory bad, vnode %u; %s...\n", dir
.vnodeNumber
,
3558 (Testing
? "skipping" : "salvaging"));
3561 CopyAndSalvage(salvinfo
, &dir
);
3563 dirVnodeInfo
->inodes
[i
] = dir
.dirHandle
.dirh_inode
;
3566 dirHandle
= dir
.dirHandle
;
3569 GetDirName(salvinfo
, bitNumberToVnodeNumber(i
, vLarge
),
3570 &dirVnodeInfo
->vnodes
[i
], path
);
3573 /* If enumeration failed for random reasons, we will probably delete
3574 * too much stuff, so we guard against this instead.
3576 struct judgeEntry_params judge_params
;
3577 judge_params
.salvinfo
= salvinfo
;
3578 judge_params
.dir
= &dir
;
3580 osi_Assert(EnumerateDir(&dirHandle
, JudgeEntry
, &judge_params
) == 0);
3583 /* Delete the old directory if it was copied in order to salvage.
3584 * CopyOnWrite has written the new inode # to the disk, but we still
3585 * have the old one in our local structure here. Thus, we idec the
3589 if (dir
.copied
&& !Testing
) {
3590 code
= IH_DEC(dir
.ds_linkH
, dirHandle
.dirh_handle
->ih_ino
, rwVid
);
3591 osi_Assert(code
== 0);
3592 dirVnodeInfo
->inodes
[i
] = dir
.dirHandle
.dirh_inode
;
3595 /* Remember rootdir DirSummary _after_ it has been judged */
3596 if (dir
.vnodeNumber
== 1 && dir
.unique
== 1) {
3597 memcpy(rootdir
, &dir
, sizeof(struct DirSummary
));
3605 * Get a new FID that can be used to create a new file.
3607 * @param[in] volHeader vol header for the volume
3608 * @param[in] class what type of vnode we'll be creating (vLarge or vSmall)
3609 * @param[out] afid the FID that we can use (only Vnode and Unique are set)
3610 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3611 * updated to the new max unique if we create a new
3615 GetNewFID(struct SalvInfo
*salvinfo
, VolumeDiskData
*volHeader
,
3616 VnodeClass
class, AFSFid
*afid
, Unique
*maxunique
)
3619 for (i
= 0; i
< salvinfo
->vnodeInfo
[class].nVnodes
; i
++) {
3620 if (salvinfo
->vnodeInfo
[class].vnodes
[i
].type
== vNull
) {
3624 if (i
== salvinfo
->vnodeInfo
[class].nVnodes
) {
3625 /* no free vnodes; make a new one */
3626 salvinfo
->vnodeInfo
[class].nVnodes
++;
3627 salvinfo
->vnodeInfo
[class].vnodes
=
3628 realloc(salvinfo
->vnodeInfo
[class].vnodes
,
3629 sizeof(struct VnodeEssence
) * (i
+1));
3631 salvinfo
->vnodeInfo
[class].vnodes
[i
].type
= vNull
;
3634 afid
->Vnode
= bitNumberToVnodeNumber(i
, class);
3636 if (volHeader
->uniquifier
< (*maxunique
+ 1)) {
3637 /* header uniq is bad; it will get bumped by 2000 later */
3638 afid
->Unique
= *maxunique
+ 1 + 2000;
3641 /* header uniq seems okay; just use that */
3642 afid
->Unique
= *maxunique
= volHeader
->uniquifier
++;
3647 * Create a vnode for a README file explaining not to use a recreated-root vol.
3649 * @param[in] volHeader vol header for the volume
3650 * @param[in] alinkH ihandle for i/o for the volume
3651 * @param[in] vid volume id
3652 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3653 * updated to the new max unique if we create a new
3655 * @param[out] afid FID for the new readme vnode
3656 * @param[out] ainode the inode for the new readme file
3658 * @return operation status
3663 CreateReadme(struct SalvInfo
*salvinfo
, VolumeDiskData
*volHeader
,
3664 IHandle_t
*alinkH
, VolumeId vid
, Unique
*maxunique
, AFSFid
*afid
,
3668 struct VnodeDiskObject
*rvnode
= NULL
;
3670 IHandle_t
*readmeH
= NULL
;
3671 struct VnodeEssence
*vep
;
3673 time_t now
= time(NULL
);
3675 /* Try to make the note brief, but informative. Only administrators should
3676 * be able to read this file at first, so we can hopefully assume they
3677 * know what AFS is, what a volume is, etc. */
3679 "This volume has been salvaged, but has lost its original root directory.\n"
3680 "The root directory that exists now has been recreated from orphan files\n"
3681 "from the rest of the volume. This recreated root directory may interfere\n"
3682 "with old cached data on clients, and there is no way the salvager can\n"
3683 "reasonably prevent that. So, it is recommended that you do not continue to\n"
3684 "use this volume, but only copy the salvaged data to a new volume.\n"
3685 "Continuing to use this volume as it exists now may cause some clients to\n"
3686 "behave oddly when accessing this volume.\n"
3687 "\n\t -- Your friendly neighborhood OpenAFS salvager\n";
3688 /* ^ the person reading this probably just lost some data, so they could
3689 * use some cheering up. */
3691 /* -1 for the trailing NUL */
3692 length
= sizeof(readme
) - 1;
3694 GetNewFID(salvinfo
, volHeader
, vSmall
, afid
, maxunique
);
3696 vep
= &salvinfo
->vnodeInfo
[vSmall
].vnodes
[vnodeIdToBitNumber(afid
->Vnode
)];
3698 /* create the inode and write the contents */
3699 readmeinode
= IH_CREATE(alinkH
, salvinfo
->fileSysDevice
,
3700 salvinfo
->fileSysPath
, 0, vid
,
3701 afid
->Vnode
, afid
->Unique
, 1);
3702 if (!VALID_INO(readmeinode
)) {
3703 Log("CreateReadme: readme IH_CREATE failed\n");
3707 IH_INIT(readmeH
, salvinfo
->fileSysDevice
, vid
, readmeinode
);
3708 bytes
= IH_IWRITE(readmeH
, 0, readme
, length
);
3709 IH_RELEASE(readmeH
);
3711 if (bytes
!= length
) {
3712 Log("CreateReadme: IWRITE failed (%d/%d)\n", (int)bytes
,
3713 (int)sizeof(readme
));
3717 /* create the vnode and write it out */
3718 rvnode
= calloc(1, SIZEOF_SMALLDISKVNODE
);
3720 Log("CreateRootDir: error alloc'ing memory\n");
3724 rvnode
->type
= vFile
;
3726 rvnode
->modeBits
= 0777;
3727 rvnode
->linkCount
= 1;
3728 VNDISK_SET_LEN(rvnode
, length
);
3729 rvnode
->uniquifier
= afid
->Unique
;
3730 rvnode
->dataVersion
= 1;
3731 VNDISK_SET_INO(rvnode
, readmeinode
);
3732 rvnode
->unixModifyTime
= rvnode
->serverModifyTime
= now
;
3737 rvnode
->vnodeMagic
= VnodeClassInfo
[vSmall
].magic
;
3739 bytes
= IH_IWRITE(salvinfo
->vnodeInfo
[vSmall
].handle
,
3740 vnodeIndexOffset(&VnodeClassInfo
[vSmall
], afid
->Vnode
),
3741 (char*)rvnode
, SIZEOF_SMALLDISKVNODE
);
3743 if (bytes
!= SIZEOF_SMALLDISKVNODE
) {
3744 Log("CreateReadme: IH_IWRITE failed (%d/%d)\n", (int)bytes
,
3745 (int)SIZEOF_SMALLDISKVNODE
);
3749 /* update VnodeEssence for new readme vnode */
3750 salvinfo
->vnodeInfo
[vSmall
].nAllocatedVnodes
++;
3752 vep
->blockCount
= nBlocks(length
);
3753 salvinfo
->vnodeInfo
[vSmall
].volumeBlockCount
+= vep
->blockCount
;
3754 vep
->parent
= rvnode
->parent
;
3755 vep
->unique
= rvnode
->uniquifier
;
3756 vep
->modeBits
= rvnode
->modeBits
;
3757 vep
->InodeNumber
= VNDISK_GET_INO(rvnode
);
3758 vep
->type
= rvnode
->type
;
3759 vep
->author
= rvnode
->author
;
3760 vep
->owner
= rvnode
->owner
;
3761 vep
->group
= rvnode
->group
;
3771 *ainode
= readmeinode
;
3776 if (IH_DEC(alinkH
, readmeinode
, vid
)) {
3777 Log("CreateReadme (recovery): IH_DEC failed\n");
3789 * create a root dir for a volume that lacks one.
3791 * @param[in] volHeader vol header for the volume
3792 * @param[in] alinkH ihandle for disk access for this volume group
3793 * @param[in] vid volume id we're dealing with
3794 * @param[out] rootdir populated with info about the new root dir
3795 * @param[inout] maxunique max uniquifier for all vnodes in the volume;
3796 * updated to the new max unique if we create a new
3799 * @return operation status
3804 CreateRootDir(struct SalvInfo
*salvinfo
, VolumeDiskData
*volHeader
,
3805 IHandle_t
*alinkH
, VolumeId vid
, struct DirSummary
*rootdir
,
3809 int decroot
= 0, decreadme
= 0;
3810 AFSFid did
, readmeid
;
3813 struct VnodeDiskObject
*rootvnode
= NULL
;
3814 struct acl_accessList
*ACL
;
3817 struct VnodeEssence
*vep
;
3818 Inode readmeinode
= 0;
3819 time_t now
= time(NULL
);
3821 if (!salvinfo
->vnodeInfo
[vLarge
].vnodes
&& !salvinfo
->vnodeInfo
[vSmall
].vnodes
) {
3822 Log("Not creating new root dir; volume appears to lack any vnodes\n");
3826 if (!salvinfo
->vnodeInfo
[vLarge
].vnodes
) {
3827 /* We don't have any large vnodes in the volume; allocate room
3828 * for one so we can recreate the root dir */
3829 salvinfo
->vnodeInfo
[vLarge
].nVnodes
= 1;
3830 salvinfo
->vnodeInfo
[vLarge
].vnodes
= calloc(1, sizeof(struct VnodeEssence
));
3831 salvinfo
->vnodeInfo
[vLarge
].inodes
= calloc(1, sizeof(Inode
));
3833 osi_Assert(salvinfo
->vnodeInfo
[vLarge
].vnodes
);
3834 osi_Assert(salvinfo
->vnodeInfo
[vLarge
].inodes
);
3837 vep
= &salvinfo
->vnodeInfo
[vLarge
].vnodes
[vnodeIdToBitNumber(1)];
3838 ip
= &salvinfo
->vnodeInfo
[vLarge
].inodes
[vnodeIdToBitNumber(1)];
3839 if (vep
->type
!= vNull
) {
3840 Log("Not creating new root dir; existing vnode 1 is non-null\n");
3844 if (CreateReadme(salvinfo
, volHeader
, alinkH
, vid
, maxunique
, &readmeid
,
3845 &readmeinode
) != 0) {
3850 /* set the DV to a very high number, so it is unlikely that we collide
3851 * with a cached DV */
3854 rootinode
= IH_CREATE(alinkH
, salvinfo
->fileSysDevice
, salvinfo
->fileSysPath
,
3856 if (!VALID_INO(rootinode
)) {
3857 Log("CreateRootDir: IH_CREATE failed\n");
3862 SetSalvageDirHandle(&rootdir
->dirHandle
, vid
, salvinfo
->fileSysDevice
,
3863 rootinode
, &salvinfo
->VolumeChanged
);
3867 if (MakeDir(&rootdir
->dirHandle
, (afs_int32
*)&did
, (afs_int32
*)&did
)) {
3868 Log("CreateRootDir: MakeDir failed\n");
3871 if (Create(&rootdir
->dirHandle
, "README.ROOTDIR", &readmeid
)) {
3872 Log("CreateRootDir: Create failed\n");
3876 length
= Length(&rootdir
->dirHandle
);
3877 DZap((void *)&rootdir
->dirHandle
);
3879 /* create the new root dir vnode */
3880 rootvnode
= calloc(1, SIZEOF_LARGEDISKVNODE
);
3882 Log("CreateRootDir: malloc failed\n");
3886 /* only give 'rl' permissions to 'system:administrators'. We do this to
3887 * try to catch the attention of an administrator, that they should not
3888 * be writing to this directory or continue to use it. */
3889 ACL
= VVnodeDiskACL(rootvnode
);
3890 ACL
->size
= sizeof(struct acl_accessList
);
3891 ACL
->version
= ACL_ACLVERSION
;
3895 ACL
->entries
[0].id
= -204; /* system:administrators */
3896 ACL
->entries
[0].rights
= PRSFS_READ
| PRSFS_LOOKUP
;
3898 rootvnode
->type
= vDirectory
;
3899 rootvnode
->cloned
= 0;
3900 rootvnode
->modeBits
= 0777;
3901 rootvnode
->linkCount
= 2;
3902 VNDISK_SET_LEN(rootvnode
, length
);
3903 rootvnode
->uniquifier
= 1;
3904 rootvnode
->dataVersion
= dv
;
3905 VNDISK_SET_INO(rootvnode
, rootinode
);
3906 rootvnode
->unixModifyTime
= rootvnode
->serverModifyTime
= now
;
3907 rootvnode
->author
= 0;
3908 rootvnode
->owner
= 0;
3909 rootvnode
->parent
= 0;
3910 rootvnode
->group
= 0;
3911 rootvnode
->vnodeMagic
= VnodeClassInfo
[vLarge
].magic
;
3913 /* write it out to disk */
3914 bytes
= IH_IWRITE(salvinfo
->vnodeInfo
[vLarge
].handle
,
3915 vnodeIndexOffset(&VnodeClassInfo
[vLarge
], 1),
3916 (char*)rootvnode
, SIZEOF_LARGEDISKVNODE
);
3918 if (bytes
!= SIZEOF_LARGEDISKVNODE
) {
3919 /* just cast to int and don't worry about printing real 64-bit ints;
3920 * a large disk vnode isn't anywhere near the 32-bit limit */
3921 Log("CreateRootDir: IH_IWRITE failed (%d/%d)\n", (int)bytes
,
3922 (int)SIZEOF_LARGEDISKVNODE
);
3926 /* update VnodeEssence for the new root vnode */
3927 salvinfo
->vnodeInfo
[vLarge
].nAllocatedVnodes
++;
3929 vep
->blockCount
= nBlocks(length
);
3930 salvinfo
->vnodeInfo
[vLarge
].volumeBlockCount
+= vep
->blockCount
;
3931 vep
->parent
= rootvnode
->parent
;
3932 vep
->unique
= rootvnode
->uniquifier
;
3933 vep
->modeBits
= rootvnode
->modeBits
;
3934 vep
->InodeNumber
= VNDISK_GET_INO(rootvnode
);
3935 vep
->type
= rootvnode
->type
;
3936 vep
->author
= rootvnode
->author
;
3937 vep
->owner
= rootvnode
->owner
;
3938 vep
->group
= rootvnode
->group
;
3948 /* update DirSummary for the new root vnode */
3949 rootdir
->vnodeNumber
= 1;
3950 rootdir
->unique
= 1;
3951 rootdir
->haveDot
= 1;
3952 rootdir
->haveDotDot
= 1;
3953 rootdir
->rwVid
= vid
;
3954 rootdir
->copied
= 0;
3955 rootdir
->parent
= 0;
3956 rootdir
->name
= strdup(".");
3957 rootdir
->vname
= volHeader
->name
;
3958 rootdir
->ds_linkH
= alinkH
;
3965 if (decroot
&& IH_DEC(alinkH
, rootinode
, vid
)) {
3966 Log("CreateRootDir (recovery): IH_DEC (root) failed\n");
3968 if (decreadme
&& IH_DEC(alinkH
, readmeinode
, vid
)) {
3969 Log("CreateRootDir (recovery): IH_DEC (readme) failed\n");
3979 * salvage a volume group.
3981 * @param[in] salvinfo information for the curent salvage job
3982 * @param[in] rwIsp inode summary for rw volume
3983 * @param[in] alinkH link table inode handle
3985 * @return operation status
3989 SalvageVolume(struct SalvInfo
*salvinfo
, struct InodeSummary
*rwIsp
, IHandle_t
* alinkH
)
3991 /* This routine, for now, will only be called for read-write volumes */
3993 int BlocksInVolume
= 0, FilesInVolume
= 0;
3995 struct DirSummary rootdir
, oldrootdir
;
3996 struct VnodeInfo
*dirVnodeInfo
;
3997 struct VnodeDiskObject vnode
;
3998 VolumeDiskData volHeader
;
4000 int orphaned
, rootdirfound
= 0;
4001 Unique maxunique
= 0; /* the maxUniquifier from the vnodes */
4002 afs_int32 ofiles
= 0, oblocks
= 0; /* Number of orphaned files/blocks */
4003 struct VnodeEssence
*vep
;
4006 afs_sfsize_t nBytes
;
4008 VnodeId LFVnode
, ThisVnode
;
4009 Unique LFUnique
, ThisUnique
;
4013 vid
= rwIsp
->volSummary
->header
.id
;
4014 IH_INIT(h
, salvinfo
->fileSysDevice
, vid
, rwIsp
->volSummary
->header
.volumeInfo
);
4015 nBytes
= IH_IREAD(h
, 0, (char *)&volHeader
, sizeof(volHeader
));
4016 osi_Assert(nBytes
== sizeof(volHeader
));
4017 osi_Assert(volHeader
.stamp
.magic
== VOLUMEINFOMAGIC
);
4018 osi_Assert(volHeader
.destroyMe
!= DESTROY_ME
);
4019 /* (should not have gotten this far with DESTROY_ME flag still set!) */
4021 DistilVnodeEssence(salvinfo
, vid
, vLarge
,
4022 rwIsp
->volSummary
->header
.largeVnodeIndex
, &maxunique
);
4023 DistilVnodeEssence(salvinfo
, vid
, vSmall
,
4024 rwIsp
->volSummary
->header
.smallVnodeIndex
, &maxunique
);
4026 dirVnodeInfo
= &salvinfo
->vnodeInfo
[vLarge
];
4027 for (i
= 0; i
< dirVnodeInfo
->nVnodes
; i
++) {
4028 SalvageDir(salvinfo
, volHeader
.name
, vid
, dirVnodeInfo
, alinkH
, i
,
4029 &rootdir
, &rootdirfound
);
4032 nt_sync(salvinfo
->fileSysDevice
);
4034 sync(); /* This used to be done lower level, for every dir */
4041 if (!rootdirfound
&& (orphans
== ORPH_ATTACH
) && !Testing
) {
4043 Log("Cannot find root directory for volume %lu; attempting to create "
4044 "a new one\n", afs_printable_uint32_lu(vid
));
4046 code
= CreateRootDir(salvinfo
, &volHeader
, alinkH
, vid
, &rootdir
,
4051 salvinfo
->VolumeChanged
= 1;
4055 /* Parse each vnode looking for orphaned vnodes and
4056 * connect them to the tree as orphaned (if requested).
4058 oldrootdir
= rootdir
;
4059 for (class = 0; class < nVNODECLASSES
; class++) {
4060 for (v
= 0; v
< salvinfo
->vnodeInfo
[class].nVnodes
; v
++) {
4061 vep
= &(salvinfo
->vnodeInfo
[class].vnodes
[v
]);
4062 ThisVnode
= bitNumberToVnodeNumber(v
, class);
4063 ThisUnique
= vep
->unique
;
4065 if ((vep
->type
== 0) || vep
->claimed
|| ThisVnode
== 1)
4066 continue; /* Ignore unused, claimed, and root vnodes */
4068 /* This vnode is orphaned. If it is a directory vnode, then the '..'
4069 * entry in this vnode had incremented the parent link count (In
4070 * JudgeEntry()). We need to go to the parent and decrement that
4071 * link count. But if the parent's unique is zero, then the parent
4072 * link count was not incremented in JudgeEntry().
4074 if (class == vLarge
) { /* directory vnode */
4075 pv
= vnodeIdToBitNumber(vep
->parent
);
4076 if (salvinfo
->vnodeInfo
[vLarge
].vnodes
[pv
].unique
!= 0) {
4077 if (vep
->parent
== 1 && newrootdir
) {
4078 /* this vnode's parent was the volume root, and
4079 * we just created the volume root. So, the parent
4080 * dir didn't exist during JudgeEntry, so the link
4081 * count was not inc'd there, so don't dec it here.
4087 salvinfo
->vnodeInfo
[vLarge
].vnodes
[pv
].count
++;
4093 continue; /* If no rootdir, can't attach orphaned files */
4095 /* Here we attach orphaned files and directories into the
4096 * root directory, LVVnode, making sure link counts stay correct.
4098 if ((orphans
== ORPH_ATTACH
) && !vep
->todelete
&& !Testing
) {
4099 LFVnode
= rootdir
.vnodeNumber
; /* Lost+Found vnode number */
4100 LFUnique
= rootdir
.unique
; /* Lost+Found uniquifier */
4102 /* Update this orphaned vnode's info. Its parent info and
4103 * link count (do for orphaned directories and files).
4105 vep
->parent
= LFVnode
; /* Parent is the root dir */
4106 vep
->unique
= LFUnique
;
4109 vep
->count
--; /* Inc link count (root dir will pt to it) */
4111 /* If this orphaned vnode is a directory, change '..'.
4112 * The name of the orphaned dir/file is unknown, so we
4113 * build a unique name. No need to CopyOnWrite the directory
4114 * since it is not connected to tree in BK or RO volume and
4115 * won't be visible there.
4117 if (class == vLarge
) {
4121 /* Remove and recreate the ".." entry in this orphaned directory */
4122 SetSalvageDirHandle(&dh
, vid
, salvinfo
->fileSysDevice
,
4123 salvinfo
->vnodeInfo
[class].inodes
[v
],
4124 &salvinfo
->VolumeChanged
);
4126 pa
.Unique
= LFUnique
;
4127 osi_Assert(Delete(&dh
, "..") == 0);
4128 osi_Assert(Create(&dh
, "..", &pa
) == 0);
4130 /* The original parent's link count was decremented above.
4131 * Here we increment the new parent's link count.
4133 pv
= vnodeIdToBitNumber(LFVnode
);
4134 salvinfo
->vnodeInfo
[vLarge
].vnodes
[pv
].count
--;
4138 /* Go to the root dir and add this entry. The link count of the
4139 * root dir was incremented when ".." was created. Try 10 times.
4141 for (j
= 0; j
< 10; j
++) {
4142 pa
.Vnode
= ThisVnode
;
4143 pa
.Unique
= ThisUnique
;
4145 (void)afs_snprintf(npath
, sizeof npath
, "%s.%u.%u",
4147 vLarge
) ? "__ORPHANDIR__" :
4148 "__ORPHANFILE__"), ThisVnode
,
4151 CopyOnWrite(salvinfo
, &rootdir
);
4152 code
= Create(&rootdir
.dirHandle
, npath
, &pa
);
4156 ThisUnique
+= 50; /* Try creating a different file */
4158 osi_Assert(code
== 0);
4159 Log("Attaching orphaned %s to volume's root dir as %s\n",
4160 ((class == vLarge
) ? "directory" : "file"), npath
);
4162 } /* for each vnode in the class */
4163 } /* for each class of vnode */
4165 /* Delete the old rootinode directory if the rootdir was CopyOnWrite */
4167 if (rootdirfound
&& !oldrootdir
.copied
&& rootdir
.copied
) {
4169 IH_DEC(oldrootdir
.ds_linkH
, oldrootdir
.dirHandle
.dirh_inode
,
4171 osi_Assert(code
== 0);
4172 /* dirVnodeInfo->inodes[?] is not updated with new inode number */
4175 DFlush(); /* Flush the changes */
4176 if (!rootdirfound
&& (orphans
== ORPH_ATTACH
)) {
4177 Log("Cannot attach orphaned files and directories: Root directory not found\n");
4178 orphans
= ORPH_IGNORE
;
4181 /* Write out all changed vnodes. Orphaned files and directories
4182 * will get removed here also (if requested).
4184 for (class = 0; class < nVNODECLASSES
; class++) {
4185 afs_sfsize_t nVnodes
= salvinfo
->vnodeInfo
[class].nVnodes
;
4186 struct VnodeClassInfo
*vcp
= &VnodeClassInfo
[class];
4187 struct VnodeEssence
*vnodes
= salvinfo
->vnodeInfo
[class].vnodes
;
4188 FilesInVolume
+= salvinfo
->vnodeInfo
[class].nAllocatedVnodes
;
4189 BlocksInVolume
+= salvinfo
->vnodeInfo
[class].volumeBlockCount
;
4190 for (i
= 0; i
< nVnodes
; i
++) {
4191 struct VnodeEssence
*vnp
= &vnodes
[i
];
4192 VnodeId vnodeNumber
= bitNumberToVnodeNumber(i
, class);
4194 /* If the vnode is good but is unclaimed (not listed in
4195 * any directory entries), then it is orphaned.
4198 if ((vnp
->type
!= 0) && (orphaned
= IsVnodeOrphaned(salvinfo
, vnodeNumber
))) {
4199 vnp
->claimed
= 0; /* Makes IsVnodeOrphaned calls faster */
4203 if (vnp
->changed
|| vnp
->count
) {
4206 IH_IREAD(salvinfo
->vnodeInfo
[class].handle
,
4207 vnodeIndexOffset(vcp
, vnodeNumber
),
4208 (char *)&vnode
, sizeof(vnode
));
4209 osi_Assert(nBytes
== sizeof(vnode
));
4211 vnode
.parent
= vnp
->parent
;
4212 oldCount
= vnode
.linkCount
;
4213 vnode
.linkCount
= vnode
.linkCount
- vnp
->count
;
4216 orphaned
= IsVnodeOrphaned(salvinfo
, vnodeNumber
);
4218 if (!vnp
->todelete
) {
4219 /* Orphans should have already been attached (if requested) */
4220 osi_Assert(orphans
!= ORPH_ATTACH
);
4221 oblocks
+= vnp
->blockCount
;
4224 if (((orphans
== ORPH_REMOVE
) || vnp
->todelete
)
4226 BlocksInVolume
-= vnp
->blockCount
;
4228 if (VNDISK_GET_INO(&vnode
)) {
4230 IH_DEC(alinkH
, VNDISK_GET_INO(&vnode
), vid
);
4231 osi_Assert(code
== 0);
4233 memset(&vnode
, 0, sizeof(vnode
));
4235 } else if (vnp
->count
) {
4237 Log("Vnode %u: link count incorrect (was %d, %s %d)\n", vnodeNumber
, oldCount
, (Testing
? "would have changed to" : "now"), vnode
.linkCount
);
4240 vnode
.modeBits
= vnp
->modeBits
;
4243 vnode
.dataVersion
++;
4246 IH_IWRITE(salvinfo
->vnodeInfo
[class].handle
,
4247 vnodeIndexOffset(vcp
, vnodeNumber
),
4248 (char *)&vnode
, sizeof(vnode
));
4249 osi_Assert(nBytes
== sizeof(vnode
));
4251 salvinfo
->VolumeChanged
= 1;
4255 if (!Showmode
&& ofiles
) {
4256 Log("%s %d orphaned files and directories (approx. %u KB)\n",
4258 && (orphans
== ORPH_REMOVE
)) ? "Removed" : "Found", ofiles
,
4262 for (class = 0; class < nVNODECLASSES
; class++) {
4263 struct VnodeInfo
*vip
= &salvinfo
->vnodeInfo
[class];
4264 for (i
= 0; i
< vip
->nVnodes
; i
++)
4265 if (vip
->vnodes
[i
].name
)
4266 free(vip
->vnodes
[i
].name
);
4273 /* Set correct resource utilization statistics */
4274 volHeader
.filecount
= FilesInVolume
;
4275 volHeader
.diskused
= BlocksInVolume
;
4277 /* Make sure the uniquifer is big enough: maxunique is the real maxUniquifier */
4278 if (volHeader
.uniquifier
< (maxunique
+ 1)) {
4280 Log("Volume uniquifier is too low; fixed\n");
4281 /* Plus 2,000 in case there are workstations out there with
4282 * cached vnodes that have since been deleted
4284 volHeader
.uniquifier
= (maxunique
+ 1 + 2000);
4288 Log("*** WARNING: Root directory recreated, but volume is fragile! "
4289 "Only use this salvaged volume to copy data to another volume; "
4290 "do not continue to use this volume (%lu) as-is.\n",
4291 afs_printable_uint32_lu(vid
));
4294 if (!Testing
&& salvinfo
->VolumeChanged
) {
4295 #ifdef FSSYNC_BUILD_CLIENT
4296 if (salvinfo
->useFSYNC
) {
4297 afs_int32 fsync_code
;
4299 fsync_code
= FSYNC_VolOp(vid
, NULL
, FSYNC_VOL_BREAKCBKS
, FSYNC_SALVAGE
, NULL
);
4301 Log("Error trying to tell the fileserver to break callbacks for "
4302 "changed volume %lu; error code %ld\n",
4303 afs_printable_uint32_lu(vid
),
4304 afs_printable_int32_ld(fsync_code
));
4306 salvinfo
->VolumeChanged
= 0;
4309 #endif /* FSSYNC_BUILD_CLIENT */
4311 #ifdef AFS_DEMAND_ATTACH_FS
4312 if (!salvinfo
->useFSYNC
) {
4313 /* A volume's contents have changed, but the fileserver will not
4314 * break callbacks on the volume until it tries to load the vol
4315 * header. So, to reduce the amount of time a client could have
4316 * stale data, remove fsstate.dat, so the fileserver will init
4317 * callback state with all clients. This is a very coarse hammer,
4318 * and in the future we should just record which volumes have
4320 code
= unlink(AFSDIR_SERVER_FSSTATE_FILEPATH
);
4321 if (code
&& errno
!= ENOENT
) {
4322 Log("Error %d when trying to unlink FS state file %s\n", errno
,
4323 AFSDIR_SERVER_FSSTATE_FILEPATH
);
4329 /* Turn off the inUse bit; the volume's been salvaged! */
4330 volHeader
.inUse
= 0; /* clear flag indicating inUse@last crash */
4331 volHeader
.needsSalvaged
= 0; /* clear 'damaged' flag */
4332 volHeader
.inService
= 1; /* allow service again */
4333 if (salvinfo
->VolumeChanged
) {
4334 volHeader
.needsCallback
= 1;
4335 volHeader
.updateDate
= time(NULL
);
4337 volHeader
.needsCallback
= 0;
4339 volHeader
.dontSalvage
= DONT_SALVAGE
;
4340 salvinfo
->VolumeChanged
= 0;
4342 nBytes
= IH_IWRITE(h
, 0, (char *)&volHeader
, sizeof(volHeader
));
4343 osi_Assert(nBytes
== sizeof(volHeader
));
4346 Log("%sSalvaged %s (%u): %d files, %d blocks\n",
4347 (Testing
? "It would have " : ""), volHeader
.name
, volHeader
.id
,
4348 FilesInVolume
, BlocksInVolume
);
4351 IH_RELEASE(salvinfo
->vnodeInfo
[vSmall
].handle
);
4352 IH_RELEASE(salvinfo
->vnodeInfo
[vLarge
].handle
);
4358 ClearROInUseBit(struct VolumeSummary
*summary
)
4360 IHandle_t
*h
= summary
->volumeInfoHandle
;
4361 afs_sfsize_t nBytes
;
4363 VolumeDiskData volHeader
;
4365 nBytes
= IH_IREAD(h
, 0, (char *)&volHeader
, sizeof(volHeader
));
4366 osi_Assert(nBytes
== sizeof(volHeader
));
4367 osi_Assert(volHeader
.stamp
.magic
== VOLUMEINFOMAGIC
);
4368 volHeader
.inUse
= 0;
4369 volHeader
.needsSalvaged
= 0;
4370 volHeader
.inService
= 1;
4371 volHeader
.dontSalvage
= DONT_SALVAGE
;
4373 nBytes
= IH_IWRITE(h
, 0, (char *)&volHeader
, sizeof(volHeader
));
4374 osi_Assert(nBytes
== sizeof(volHeader
));
4379 * Possible delete the volume.
4381 * deleteMe - Always do so, only a partial volume.
4384 MaybeZapVolume(struct SalvInfo
*salvinfo
, struct InodeSummary
*isp
,
4385 char *message
, int deleteMe
, int check
)
4387 if (readOnly(isp
) || deleteMe
) {
4388 if (isp
->volSummary
&& !isp
->volSummary
->deleted
) {
4391 Log("Volume %u (is only a partial volume--probably an attempt was made to move/restore it when a machine crash occured.\n", isp
->volumeId
);
4393 Log("It will be deleted on this server (you may find it elsewhere)\n");
4396 Log("Volume %u needs to be salvaged. Since it is read-only, however,\n", isp
->volumeId
);
4398 Log("it will be deleted instead. It should be recloned.\n");
4403 char filename
[VMAXPATHLEN
];
4404 VolumeExternalName_r(isp
->volumeId
, filename
, sizeof(filename
));
4405 sprintf(path
, "%s" OS_DIRSEP
"%s", salvinfo
->fileSysPath
, filename
);
4407 code
= VDestroyVolumeDiskHeader(salvinfo
->fileSysPartition
, isp
->volumeId
, isp
->RWvolumeId
);
4409 Log("Error %ld destroying volume disk header for volume %lu\n",
4410 afs_printable_int32_ld(code
),
4411 afs_printable_uint32_lu(isp
->volumeId
));
4414 /* make sure we actually delete the header file; ENOENT
4415 * is fine, since VDestroyVolumeDiskHeader probably already
4417 if (unlink(path
) && errno
!= ENOENT
) {
4418 Log("Unable to unlink %s (errno = %d)\n", path
, errno
);
4420 if (salvinfo
->useFSYNC
) {
4421 AskDelete(salvinfo
, isp
->volumeId
);
4423 isp
->volSummary
->deleted
= 1;
4426 } else if (!check
) {
4427 Log("%s salvage was unsuccessful: read-write volume %u\n", message
,
4429 Abort("Salvage of volume %u aborted\n", isp
->volumeId
);
4433 #ifdef AFS_DEMAND_ATTACH_FS
4435 * Locks a volume on disk for salvaging.
4437 * @param[in] volumeId volume ID to lock
4439 * @return operation status
4441 * @retval -1 volume lock raced with a fileserver restart; all volumes must
4442 * checked out and locked again
4447 LockVolume(struct SalvInfo
*salvinfo
, VolumeId volumeId
)
4452 /* should always be WRITE_LOCK, but keep the lock-type logic all
4453 * in one place, in VVolLockType. Params will be ignored, but
4454 * try to provide what we're logically doing. */
4455 locktype
= VVolLockType(V_VOLUPD
, 1);
4457 code
= VLockVolumeByIdNB(volumeId
, salvinfo
->fileSysPartition
, locktype
);
4459 if (code
== EBUSY
) {
4460 Abort("Someone else appears to be using volume %lu; Aborted\n",
4461 afs_printable_uint32_lu(volumeId
));
4463 Abort("Error %ld trying to lock volume %lu; Aborted\n",
4464 afs_printable_int32_ld(code
),
4465 afs_printable_uint32_lu(volumeId
));
4468 code
= FSYNC_VerifyCheckout(volumeId
, salvinfo
->fileSysPartition
->name
, FSYNC_VOL_OFF
, FSYNC_SALVAGE
);
4469 if (code
== SYNC_DENIED
) {
4470 /* need to retry checking out volumes */
4473 if (code
!= SYNC_OK
) {
4474 Abort("FSYNC_VerifyCheckout failed for volume %lu with code %ld\n",
4475 afs_printable_uint32_lu(volumeId
), afs_printable_int32_ld(code
));
4478 /* set inUse = programType in the volume header to ensure that nobody
4479 * tries to use this volume again without salvaging, if we somehow crash
4480 * or otherwise exit before finishing the salvage.
4484 struct VolumeHeader header
;
4485 struct VolumeDiskHeader diskHeader
;
4486 struct VolumeDiskData volHeader
;
4488 code
= VReadVolumeDiskHeader(volumeId
, salvinfo
->fileSysPartition
, &diskHeader
);
4493 DiskToVolumeHeader(&header
, &diskHeader
);
4495 IH_INIT(h
, salvinfo
->fileSysDevice
, header
.parent
, header
.volumeInfo
);
4496 if (IH_IREAD(h
, 0, (char*)&volHeader
, sizeof(volHeader
)) != sizeof(volHeader
) ||
4497 volHeader
.stamp
.magic
!= VOLUMEINFOMAGIC
) {
4503 volHeader
.inUse
= programType
;
4505 /* If we can't re-write the header, bail out and error. We don't
4506 * assert when reading the header, since it's possible the
4507 * header isn't really there (when there's no data associated
4508 * with the volume; we just delete the vol header file in that
4509 * case). But if it's there enough that we can read it, but
4510 * somehow we cannot write to it to signify we're salvaging it,
4511 * we've got a big problem and we cannot continue. */
4512 osi_Assert(IH_IWRITE(h
, 0, (char*)&volHeader
, sizeof(volHeader
)) == sizeof(volHeader
));
4519 #endif /* AFS_DEMAND_ATTACH_FS */
4522 AskError(struct SalvInfo
*salvinfo
, VolumeId volumeId
)
4524 #if defined(AFS_DEMAND_ATTACH_FS) || defined(AFS_DEMAND_ATTACH_UTIL)
4526 code
= FSYNC_VolOp(volumeId
, salvinfo
->fileSysPartition
->name
,
4527 FSYNC_VOL_FORCE_ERROR
, FSYNC_WHATEVER
, NULL
);
4528 if (code
!= SYNC_OK
) {
4529 Log("AskError: failed to force volume %lu into error state; "
4530 "SYNC error code %ld (%s)\n", (long unsigned)volumeId
,
4531 (long)code
, SYNC_res2string(code
));
4533 #endif /* AFS_DEMAND_ATTACH_FS || AFS_DEMAND_ATTACH_UTIL */
4537 AskOffline(struct SalvInfo
*salvinfo
, VolumeId volumeId
)
4542 memset(&res
, 0, sizeof(res
));
4544 for (i
= 0; i
< 3; i
++) {
4545 code
= FSYNC_VolOp(volumeId
, salvinfo
->fileSysPartition
->name
,
4546 FSYNC_VOL_OFF
, FSYNC_SALVAGE
, &res
);
4548 if (code
== SYNC_OK
) {
4550 } else if (code
== SYNC_DENIED
) {
4552 Log("AskOffline: file server denied offline request; a general salvage may be required.\n");
4554 Log("AskOffline: file server denied offline request; a general salvage is required.\n");
4555 Abort("Salvage aborted\n");
4556 } else if (code
== SYNC_BAD_COMMAND
) {
4557 Log("AskOffline: fssync protocol mismatch (bad command word '%d'); salvage aborting.\n",
4560 #ifdef AFS_DEMAND_ATTACH_FS
4561 Log("AskOffline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4563 Log("AskOffline: fileserver is DAFS but we are not.\n");
4566 #ifdef AFS_DEMAND_ATTACH_FS
4567 Log("AskOffline: fileserver is not DAFS but we are.\n");
4569 Log("AskOffline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4572 Abort("Salvage aborted\n");
4575 Log("AskOffline: request for fileserver to take volume offline failed; trying again...\n");
4576 FSYNC_clientFinis();
4580 if (code
!= SYNC_OK
) {
4581 Log("AskOffline: request for fileserver to take volume offline failed; salvage aborting.\n");
4582 Abort("Salvage aborted\n");
4586 /* don't want to pass around state; remember it here */
4587 static int isDAFS
= -1;
4592 afs_int32 code
= 1, i
;
4594 /* we don't care if we race. the answer shouldn't change */
4598 memset(&res
, 0, sizeof(res
));
4600 for (i
= 0; code
&& i
< 3; i
++) {
4601 code
= FSYNC_VolOp(0, NULL
, FSYNC_VOL_LISTVOLUMES
, FSYNC_SALVAGE
, &res
);
4603 Log("AskDAFS: FSYNC_VOL_LISTVOLUMES failed with code %ld reason "
4604 "%ld (%s); trying again...\n", (long)code
, (long)res
.hdr
.reason
,
4605 FSYNC_reason2string(res
.hdr
.reason
));
4606 FSYNC_clientFinis();
4612 Log("AskDAFS: could not determine DAFS-ness, assuming not DAFS\n");
4616 if ((res
.hdr
.flags
& SYNC_FLAG_DAFS_EXTENSIONS
)) {
4626 MaybeAskOnline(struct SalvInfo
*salvinfo
, VolumeId volumeId
)
4628 struct VolumeDiskHeader diskHdr
;
4630 code
= VReadVolumeDiskHeader(volumeId
, salvinfo
->fileSysPartition
, &diskHdr
);
4632 /* volume probably does not exist; no need to bring back online */
4635 AskOnline(salvinfo
, volumeId
);
4639 AskOnline(struct SalvInfo
*salvinfo
, VolumeId volumeId
)
4643 for (i
= 0; i
< 3; i
++) {
4644 code
= FSYNC_VolOp(volumeId
, salvinfo
->fileSysPartition
->name
,
4645 FSYNC_VOL_ON
, FSYNC_WHATEVER
, NULL
);
4647 if (code
== SYNC_OK
) {
4649 } else if (code
== SYNC_DENIED
) {
4650 Log("AskOnline: file server denied online request to volume %u partition %s; trying again...\n", volumeId
, salvinfo
->fileSysPartition
->name
);
4651 } else if (code
== SYNC_BAD_COMMAND
) {
4652 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4654 Log("AskOnline: please make sure file server binaries are same version.\n");
4658 Log("AskOnline: request for fileserver to put volume online failed; trying again...\n");
4659 FSYNC_clientFinis();
4666 AskDelete(struct SalvInfo
*salvinfo
, VolumeId volumeId
)
4671 for (i
= 0; i
< 3; i
++) {
4672 memset(&res
, 0, sizeof(res
));
4673 code
= FSYNC_VolOp(volumeId
, salvinfo
->fileSysPartition
->name
,
4674 FSYNC_VOL_DONE
, FSYNC_SALVAGE
, &res
);
4676 if (code
== SYNC_OK
) {
4678 } else if (code
== SYNC_DENIED
) {
4679 Log("AskOnline: file server denied DONE request to volume %u partition %s; trying again...\n", volumeId
, salvinfo
->fileSysPartition
->name
);
4680 } else if (code
== SYNC_BAD_COMMAND
) {
4681 Log("AskOnline: fssync protocol mismatch (bad command word '%d')\n",
4684 #ifdef AFS_DEMAND_ATTACH_FS
4685 Log("AskOnline: please make sure dafileserver, davolserver, salvageserver and dasalvager binaries are same version.\n");
4687 Log("AskOnline: fileserver is DAFS but we are not.\n");
4690 #ifdef AFS_DEMAND_ATTACH_FS
4691 Log("AskOnline: fileserver is not DAFS but we are.\n");
4693 Log("AskOnline: please make sure fileserver, volserver and salvager binaries are same version.\n");
4697 } else if (code
== SYNC_FAILED
&&
4698 (res
.hdr
.reason
== FSYNC_UNKNOWN_VOLID
||
4699 res
.hdr
.reason
== FSYNC_WRONG_PART
)) {
4700 /* volume is already effectively 'deleted' */
4704 Log("AskOnline: request for fileserver to delete volume failed; trying again...\n");
4705 FSYNC_clientFinis();
4712 CopyInode(Device device
, Inode inode1
, Inode inode2
, int rwvolume
)
4714 /* Volume parameter is passed in case iopen is upgraded in future to
4715 * require a volume Id to be passed
4718 IHandle_t
*srcH
, *destH
;
4719 FdHandle_t
*srcFdP
, *destFdP
;
4721 afs_foff_t size
= 0;
4723 IH_INIT(srcH
, device
, rwvolume
, inode1
);
4724 srcFdP
= IH_OPEN(srcH
);
4725 osi_Assert(srcFdP
!= NULL
);
4726 IH_INIT(destH
, device
, rwvolume
, inode2
);
4727 destFdP
= IH_OPEN(destH
);
4728 while ((nBytes
= FDH_PREAD(srcFdP
, buf
, sizeof(buf
), size
)) > 0) {
4729 osi_Assert(FDH_PWRITE(destFdP
, buf
, nBytes
, size
) == nBytes
);
4732 osi_Assert(nBytes
== 0);
4733 FDH_REALLYCLOSE(srcFdP
);
4734 FDH_REALLYCLOSE(destFdP
);
4741 PrintInodeList(struct SalvInfo
*salvinfo
)
4743 struct ViceInodeInfo
*ip
;
4744 struct ViceInodeInfo
*buf
;
4745 struct afs_stat status
;
4749 osi_Assert(afs_fstat(salvinfo
->inodeFd
, &status
) == 0);
4750 buf
= (struct ViceInodeInfo
*)malloc(status
.st_size
);
4751 osi_Assert(buf
!= NULL
);
4752 nInodes
= status
.st_size
/ sizeof(struct ViceInodeInfo
);
4753 osi_Assert(read(salvinfo
->inodeFd
, buf
, status
.st_size
) == status
.st_size
);
4754 for (ip
= buf
; nInodes
--; ip
++) {
4755 Log("Inode:%s, linkCount=%d, size=%#llx, p=(%u,%u,%u,%u)\n",
4756 PrintInode(stmp
, ip
->inodeNumber
), ip
->linkCount
,
4757 (afs_uintmax_t
) ip
->byteCount
, ip
->u
.param
[0], ip
->u
.param
[1],
4758 ip
->u
.param
[2], ip
->u
.param
[3]);
4764 PrintInodeSummary(struct SalvInfo
*salvinfo
)
4767 struct InodeSummary
*isp
;
4769 for (i
= 0; i
< salvinfo
->nVolumesInInodeFile
; i
++) {
4770 isp
= &salvinfo
->inodeSummary
[i
];
4771 Log("VID:%u, RW:%u, index:%d, nInodes:%d, nSpecialInodes:%d, maxUniquifier:%u, volSummary\n", isp
->volumeId
, isp
->RWvolumeId
, isp
->index
, isp
->nInodes
, isp
->nSpecialInodes
, isp
->maxUniquifier
);
4781 osi_Assert(0); /* Fork is never executed in the NT code path */
4785 #ifdef AFS_DEMAND_ATTACH_FS
4786 if ((f
== 0) && (programType
== salvageServer
)) {
4787 /* we are a salvageserver child */
4788 #ifdef FSSYNC_BUILD_CLIENT
4789 VChildProcReconnectFS_r();
4791 #ifdef SALVSYNC_BUILD_CLIENT
4795 #endif /* AFS_DEMAND_ATTACH_FS */
4796 #endif /* !AFS_NT40_ENV */
4806 #ifdef AFS_DEMAND_ATTACH_FS
4807 if (programType
== salvageServer
) {
4808 /* release all volume locks before closing down our SYNC channels.
4809 * the fileserver may try to online volumes we have checked out when
4810 * we close down FSSYNC, so we should make sure we don't have those
4811 * volumes locked when it does */
4812 struct DiskPartition64
*dp
;
4814 for (i
= 0; i
<= VOLMAXPARTS
; i
++) {
4815 dp
= VGetPartitionById(i
, 0);
4817 VLockFileReinit(&dp
->volLockFile
);
4820 # ifdef SALVSYNC_BUILD_CLIENT
4823 # ifdef FSSYNC_BUILD_CLIENT
4827 #endif /* AFS_DEMAND_ATTACH_FS */
4830 if (main_thread
!= pthread_self())
4831 pthread_exit((void *)code
);
4844 pid
= wait(&status
);
4845 osi_Assert(pid
!= -1);
4846 if (WCOREDUMP(status
))
4847 Log("\"%s\" core dumped!\n", prog
);
4848 if (WIFSIGNALED(status
) != 0 || WEXITSTATUS(status
) != 0)
4854 TimeStamp(time_t clock
, int precision
)
4857 static char timestamp
[20];
4858 lt
= localtime(&clock
);
4860 (void)strftime(timestamp
, 20, "%m/%d/%Y %H:%M:%S", lt
);
4862 (void)strftime(timestamp
, 20, "%m/%d/%Y %H:%M", lt
);
4867 CheckLogFile(char * log_path
)
4869 char oldSlvgLog
[AFSDIR_PATH_MAX
];
4871 #ifndef AFS_NT40_ENV
4878 strcpy(oldSlvgLog
, log_path
);
4879 strcat(oldSlvgLog
, ".old");
4881 renamefile(log_path
, oldSlvgLog
);
4882 logFile
= afs_fopen(log_path
, "a");
4884 if (!logFile
) { /* still nothing, use stdout */
4888 #ifndef AFS_NAMEI_ENV
4889 AFS_DEBUG_IOPS_LOG(logFile
);
4894 #ifndef AFS_NT40_ENV
4896 TimeStampLogFile(char * log_path
)
4898 char stampSlvgLog
[AFSDIR_PATH_MAX
];
4903 lt
= localtime(&now
);
4904 (void)afs_snprintf(stampSlvgLog
, sizeof stampSlvgLog
,
4905 "%s.%04d-%02d-%02d.%02d:%02d:%02d",
4906 log_path
, lt
->tm_year
+ 1900,
4907 lt
->tm_mon
+ 1, lt
->tm_mday
, lt
->tm_hour
, lt
->tm_min
,
4910 /* try to link the logfile to a timestamped filename */
4911 /* if it fails, oh well, nothing we can do */
4912 if (link(log_path
, stampSlvgLog
))
4922 #ifndef AFS_NT40_ENV
4924 printf("Can't show log since using syslog.\n");
4935 logFile
= afs_fopen(AFSDIR_SERVER_SLVGLOG_FILEPATH
, "r");
4938 printf("Can't read %s, exiting\n", AFSDIR_SERVER_SLVGLOG_FILEPATH
);
4941 while (fgets(line
, sizeof(line
), logFile
))
4948 Log(const char *format
, ...)
4954 va_start(args
, format
);
4955 (void)afs_vsnprintf(tmp
, sizeof tmp
, format
, args
);
4957 #ifndef AFS_NT40_ENV
4959 syslog(LOG_INFO
, "%s", tmp
);
4963 gettimeofday(&now
, 0);
4964 fprintf(logFile
, "%s %s", TimeStamp(now
.tv_sec
, 1), tmp
);
4970 Abort(const char *format
, ...)
4975 va_start(args
, format
);
4976 (void)afs_vsnprintf(tmp
, sizeof tmp
, format
, args
);
4978 #ifndef AFS_NT40_ENV
4980 syslog(LOG_INFO
, "%s", tmp
);
4984 fprintf(logFile
, "%s", tmp
);
4996 ToString(const char *s
)
4999 p
= (char *)malloc(strlen(s
) + 1);
5000 osi_Assert(p
!= NULL
);
5005 /* Remove the FORCESALVAGE file */
5007 RemoveTheForce(char *path
)
5010 struct afs_stat force
; /* so we can use afs_stat to find it */
5011 strcpy(target
,path
);
5012 strcat(target
,"/FORCESALVAGE");
5013 if (!Testing
&& ForceSalvage
) {
5014 if (afs_stat(target
,&force
) == 0) unlink(target
);
5018 #ifndef AFS_AIX32_ENV
5020 * UseTheForceLuke - see if we can use the force
5023 UseTheForceLuke(char *path
)
5025 struct afs_stat force
;
5027 strcpy(target
,path
);
5028 strcat(target
,"/FORCESALVAGE");
5030 return (afs_stat(target
, &force
) == 0);
5034 * UseTheForceLuke - see if we can use the force
5037 * The VRMIX fsck will not muck with the filesystem it is supposedly
5038 * fixing and create a "FORCESALVAGE" file (by design). Instead, we
5039 * muck directly with the root inode, which is within the normal
5041 * ListViceInodes() has a side effect of setting ForceSalvage if
5042 * it detects a need, based on root inode examination.
5045 UseTheForceLuke(char *path
)
5048 return 0; /* sorry OB1 */
5053 /* NT support routines */
5055 static char execpathname
[MAX_PATH
];
5057 nt_SalvagePartition(char *partName
, int jobn
)
5062 if (!*execpathname
) {
5063 n
= GetModuleFileName(NULL
, execpathname
, MAX_PATH
- 1);
5064 if (!n
|| n
== 1023)
5067 job
.cj_magic
= SALVAGER_MAGIC
;
5068 job
.cj_number
= jobn
;
5069 (void)strcpy(job
.cj_part
, partName
);
5070 pid
= (int)spawnprocveb(execpathname
, save_args
, NULL
, &job
, sizeof(job
));
5075 nt_SetupPartitionSalvage(void *datap
, int len
)
5077 childJob_t
*jobp
= (childJob_t
*) datap
;
5078 char logname
[AFSDIR_PATH_MAX
];
5080 if (len
!= sizeof(childJob_t
))
5082 if (jobp
->cj_magic
!= SALVAGER_MAGIC
)
5087 (void)sprintf(logname
, "%s.%d", AFSDIR_SERVER_SLVGLOG_FILEPATH
,
5089 logFile
= afs_fopen(logname
, "w");
5097 #endif /* AFS_NT40_ENV */