2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 #include <afsconfig.h>
15 #include <afs/com_err.h>
16 #include <afs/bubasics.h>
18 #include <afs/tcdata.h>
22 #include "error_macros.h"
23 #include "bucoord_internal.h"
24 #include "bucoord_prototypes.h"
26 #define SET_FLAG(set) \
28 curPollPtr->flags |= (set); \
31 #define CLEAR_FLAG(clear) \
33 curPollPtr->flags &= ~(clear); \
36 extern struct bc_config
*bc_globalConfig
;
37 extern afs_int32
bc_GetConn(struct bc_config
*aconfig
, afs_int32 aport
, struct rx_connection
**tconn
);
39 /* globals for backup coordinator status management */
41 dlqlinkT statusHead
; /* chain of status blocks */
42 struct Lock statusQueueLock
; /* access control for status chain */
43 struct Lock cmdLineLock
; /* lock on the cmdLine */
45 afs_int32 lastTaskCode
; /* Error code from task that last finished */
48 * get next item for status interrogation, if any.
51 nextItem(statusP linkPtr
)
55 ptr
= (dlqlinkP
) linkPtr
;
57 /* if last known item has terminated, reset ptr */
66 /* if we're back at the head again */
67 if (ptr
== &statusHead
)
69 return ((statusP
) ptr
);
79 ptr
= (dlqlinkP
) linkPtr
;
81 /* if last known item has terminated, reset ptr */
90 /* if we're back at the head again */
91 if (ptr
== &statusHead
) {
94 return ((statusP
) ptr
);
101 cmdDispatch(void *unused
)
104 char *targv
[MAXV
]; /*Ptr to parsed argv stuff */
105 afs_int32 targc
; /*Num parsed arguments */
107 char *internalCmdLine
;
109 internalCmdLine
= cmdLine
;
112 code
= cmd_ParseLine(internalCmdLine
, targv
, &targc
, MAXV
);
114 printf("Couldn't parse line: '%s'", afs_error_message(code
));
117 free(internalCmdLine
);
120 * Because the "-at" option cannot be wildcarded, we cannot fall
121 * into recusive loop here by setting dispatchCount to 1.
123 doDispatch(targc
, targv
, 1);
129 statusWatcher(void *unused
)
131 struct rx_connection
*tconn
= NULL
;
132 statusP curPollPtr
= 0;
134 struct tciStatusS statusPtr
;
136 /* task information */
137 afs_uint32 taskFlags
;
138 afs_uint32 localTaskFlags
;
139 afs_uint32 temp
; /* for flag manipulation */
152 rx_DestroyConnection(tconn
);
156 curPollPtr
= nextItem(curPollPtr
);
158 if (curPollPtr
== 0) {
159 #ifdef AFS_PTHREAD_ENV
160 struct timespec delaytime
;
162 delayTime
.tv_sec
= 5;
163 delayTime
.tv_nsec
= 0;
164 pthread_delay_np(&delayTime
);
167 IOMGR_Sleep(5); /* wait a while */
168 #endif /*else AFS_PTHREAD_ENV */
172 /* save useful information */
173 localTaskFlags
= curPollPtr
->flags
;
174 taskId
= curPollPtr
->taskId
;
175 port
= curPollPtr
->port
;
176 atTime
= curPollPtr
->scheduledDump
;
177 jobNumber
= curPollPtr
->jobNumber
;
180 /* reset certain flags; local kill; */
181 CLEAR_FLAG(ABORT_LOCAL
);
183 /* An abort request before the command even started */
184 if (atTime
&& (localTaskFlags
& ABORT_REQUEST
)) {
185 if (localTaskFlags
& NOREMOVE
) {
186 curPollPtr
->flags
|= (STARTING
| ABORT_DONE
); /* Will ignore on other passes */
187 curPollPtr
->scheduledDump
= 0;
189 deleteStatusNode(curPollPtr
);
195 /* A task not started yet - check its start time */
196 if (localTaskFlags
& STARTING
|| atTime
) {
198 * Start a timed dump if its time has come. When the job is
199 * started, it will allocate its own status structure so this
200 * one is no longer needed: delete it.
202 * Avoid multiple processes trouncing the cmdLine by placing
205 if (atTime
&& (atTime
<= time(0))) {
206 lock_cmdLine(); /* Will unlock in cmdDispatch */
208 cmdLine
= curPollPtr
->cmdLine
;
210 curPollPtr
->cmdLine
= 0;
213 printf("Starting scheduled dump: job %d\n", jobNumber
);
214 printf("schedD> %s\n", cmdLine
);
217 LWP_CreateProcess(cmdDispatch
, 16384, LWP_NORMAL_PRIORITY
,
218 (void *)2, "cmdDispatch", &dispatchPid
);
223 printf("Couldn't create cmdDispatch task\n");
226 if (localTaskFlags
& NOREMOVE
) {
227 curPollPtr
->flags
|= STARTING
; /* Will ignore on other passes */
228 curPollPtr
->flags
|= (code
? TASK_ERROR
: TASK_DONE
);
229 curPollPtr
->scheduledDump
= 0;
231 deleteStatusNode(curPollPtr
);
238 if (localTaskFlags
& ABORT_LOCAL
) {
239 /* kill the local task */
240 if ((localTaskFlags
& CONTACT_LOST
) != 0) {
241 printf("Job %d: in contact with butc at port %d\n", jobNumber
,
243 printf("Job %d cont: Local kill ignored - use normal kill\n",
248 code
= (afs_int32
) bc_GetConn(bc_globalConfig
, port
, &tconn
);
250 SET_FLAG(CONTACT_LOST
);
254 if (CheckTCVersion(tconn
)) {
255 SET_FLAG(CONTACT_LOST
);
259 /* Send abort to TC requst if we have to */
260 if (localTaskFlags
& ABORT_REQUEST
) {
261 code
= TC_RequestAbort(tconn
, taskId
);
263 afs_com_err("statusWatcher", code
, "; Can't post abort request");
264 afs_com_err("statusWatcher", 0, "...Deleting job");
265 if (localTaskFlags
& NOREMOVE
) {
266 curPollPtr
->flags
|= (STARTING
| TASK_ERROR
);
267 curPollPtr
->scheduledDump
= 0;
269 deleteStatusNode(curPollPtr
);
275 curPollPtr
->flags
&= ~ABORT_REQUEST
;
276 curPollPtr
->flags
|= ABORT_SENT
;
281 /* otherwise just get the status */
282 code
= TC_GetStatus(tconn
, taskId
, &statusPtr
);
284 if (code
== TC_NODENOTFOUND
) {
285 printf("Job %d: %s - no such task on port %d, deleting\n",
286 jobNumber
, curPollPtr
->taskName
, port
);
288 if (localTaskFlags
& NOREMOVE
) {
289 curPollPtr
->flags
|= (STARTING
| TASK_ERROR
);
290 curPollPtr
->scheduledDump
= 0;
292 deleteStatusNode(curPollPtr
); /* delete this status node */
298 SET_FLAG(CONTACT_LOST
);
302 /* in case we previously lost contact or couldn't find */
303 CLEAR_FLAG(CONTACT_LOST
);
305 /* extract useful status */
306 taskFlags
= statusPtr
.flags
;
308 /* update local status */
311 /* remember some status flags in local struct */
313 (DRIVE_WAIT
| OPR_WAIT
| CALL_WAIT
| TASK_DONE
| ABORT_DONE
|
315 curPollPtr
->flags
&= ~temp
; /* clear */
316 curPollPtr
->flags
|= (taskFlags
& temp
); /* update */
318 curPollPtr
->dbDumpId
= statusPtr
.dbDumpId
;
319 curPollPtr
->nKBytes
= statusPtr
.nKBytes
;
320 strcpy(curPollPtr
->volumeName
, statusPtr
.volumeName
);
321 curPollPtr
->volsFailed
= statusPtr
.volsFailed
;
322 curPollPtr
->lastPolled
= statusPtr
.lastPolled
;
326 if (taskFlags
& TASK_DONE
) { /*done */
327 if (taskFlags
& ABORT_DONE
) {
328 if (curPollPtr
->dbDumpId
)
329 printf("Job %d: %s: DumpID %u Aborted", jobNumber
,
330 curPollPtr
->taskName
, curPollPtr
->dbDumpId
);
332 printf("Job %d: %s Aborted", jobNumber
,
333 curPollPtr
->taskName
);
335 if (taskFlags
& TASK_ERROR
)
336 printf(" with errors\n");
343 else if (taskFlags
& TASK_ERROR
) {
344 if (!(localTaskFlags
& SILENT
)) {
345 if (curPollPtr
->dbDumpId
)
346 printf("Job %d: DumpID %u Failed with errors\n",
347 jobNumber
, curPollPtr
->dbDumpId
);
349 printf("Job %d Failed with errors\n", jobNumber
);
355 if (!(localTaskFlags
& SILENT
)) {
356 if (curPollPtr
->dbDumpId
)
357 printf("Job %d: %s: DumpID %u finished", jobNumber
,
358 curPollPtr
->taskName
, curPollPtr
->dbDumpId
);
360 printf("Job %d: %s finished", jobNumber
,
361 curPollPtr
->taskName
);
363 if (curPollPtr
->volsTotal
) {
364 printf(". %d volumes dumped",
365 (curPollPtr
->volsTotal
-
366 curPollPtr
->volsFailed
));
367 if (curPollPtr
->volsFailed
)
368 printf(", %d failed", curPollPtr
->volsFailed
);
376 /* make call to destroy task on server */
377 code
= TC_EndStatus(tconn
, taskId
);
379 printf("Job %d: %s, error in job termination cleanup\n",
380 jobNumber
, curPollPtr
->taskName
);
382 if (localTaskFlags
& NOREMOVE
) {
383 curPollPtr
->flags
|= STARTING
;
384 curPollPtr
->scheduledDump
= 0;
386 deleteStatusNode(curPollPtr
); /* unlink and destroy local task */
395 * Allocate a job number. Computes the maximum of all the job numbers
396 * and then returns the maximum+1.
397 * If no jobs are found, returns 1.
403 afs_int32 retval
= 0;
406 ptr
= statusHead
.dlq_next
;
407 while (ptr
!= &statusHead
) {
408 /* compute max of all job numbers */
409 if (((statusP
) ptr
)->jobNumber
> retval
)
410 retval
= ((statusP
) ptr
)->jobNumber
;
419 * Wait for a specific task to finish and then return.
420 * Return the task's flags when it's done. If the job
421 * had been cleaned up, then just return 0.
424 waitForTask(afs_uint32 taskId
)
427 afs_int32 done
= 0, rcode
= 0, t
;
429 t
= (TASK_DONE
| ABORT_DONE
| TASK_ERROR
);
431 /* Sleep 2 seconds */
432 #ifdef AFS_PTHREAD_ENV
433 struct timespec delaytime
;
434 delayTime
.tv_sec
= 2;
435 delayTime
.tv_nsec
= 0;
436 pthread_delay_np(&delayTime
);
439 #endif /*else AFS_PTHREAD_ENV */
441 /* Check if we are done */
443 ptr
= findStatus(taskId
);
444 if (!ptr
|| (ptr
->flags
& t
)) {
445 rcode
= (ptr
? ptr
->flags
: 0);