Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / ibm-public / postfix / dist / src / util / watchdog.c
blob7ae2c571c1854bce0b411612073cbcc9cbd44ea9
1 /* $NetBSD$ */
3 /*++
4 /* NAME
5 /* watchdog 3
6 /* SUMMARY
7 /* watchdog timer
8 /* SYNOPSIS
9 /* #include <watchdog.h>
11 /* WATCHDOG *watchdog_create(timeout, action, context)
12 /* unsigned timeout;
13 /* void (*action)(WATCHDOG *watchdog, char *context);
14 /* char *context;
16 /* void watchdog_start(watchdog)
17 /* WATCHDOG *watchdog;
19 /* void watchdog_stop(watchdog)
20 /* WATCHDOG *watchdog;
22 /* void watchdog_destroy(watchdog)
23 /* WATCHDOG *watchdog;
25 /* void watchdog_pat()
26 /* DESCRIPTION
27 /* This module implements watchdog timers that are based on ugly
28 /* UNIX alarm timers. The module is designed to survive systems
29 /* with clocks that jump occasionally.
31 /* Watchdog timers can be stacked. Only one watchdog timer can be
32 /* active at a time. Only the last created watchdog timer can be
33 /* manipulated. Watchdog timers must be destroyed in reverse order
34 /* of creation.
36 /* watchdog_create() suspends the current watchdog timer, if any,
37 /* and instantiates a new watchdog timer.
39 /* watchdog_start() starts or restarts the watchdog timer.
41 /* watchdog_stop() stops the watchdog timer.
43 /* watchdog_destroy() stops the watchdog timer, and resumes the
44 /* watchdog timer instance that was suspended by watchdog_create().
46 /* watchdog_pat() pats the watchdog, so it stays quiet.
48 /* Arguments:
49 /* .IP timeout
50 /* The watchdog time limit. When the watchdog timer runs, the
51 /* process must invoke watchdog_start(), watchdog_stop() or
52 /* watchdog_destroy() before the time limit is reached.
53 /* .IP action
54 /* A null pointer, or pointer to function that is called when the
55 /* watchdog alarm goes off. The default action is to terminate
56 /* the process with a fatal error.
57 /* .IP context
58 /* Application context that is passed to the action routine.
59 /* .IP watchdog
60 /* Must be a pointer to the most recently created watchdog instance.
61 /* This argument is checked upon each call.
62 /* BUGS
63 /* UNIX alarm timers are not stackable, so there can be at most one
64 /* watchdog instance active at any given time.
65 /* SEE ALSO
66 /* msg(3) diagnostics interface
67 /* DIAGNOSTICS
68 /* Fatal errors: memory allocation problem, system call failure.
69 /* Panics: interface violations.
70 /* LICENSE
71 /* .ad
72 /* .fi
73 /* The Secure Mailer license must be distributed with this software.
74 /* AUTHOR(S)
75 /* Wietse Venema
76 /* IBM T.J. Watson Research
77 /* P.O. Box 704
78 /* Yorktown Heights, NY 10598, USA
79 /*--*/
81 /* System library. */
83 #include <sys_defs.h>
84 #include <unistd.h>
85 #include <signal.h>
86 #include <posix_signals.h>
88 /* Utility library. */
90 #include <msg.h>
91 #include <mymalloc.h>
92 #include <killme_after.h>
93 #include <watchdog.h>
95 /* Application-specific. */
98 * Rather than having one timer that goes off when it is too late, we break
99 * up the time limit into smaller intervals so that we can deal with clocks
100 * that jump occasionally.
102 #define WATCHDOG_STEPS 3
105 * UNIX alarms are not stackable, but we can save and restore state, so that
106 * watchdogs can at least be nested, sort of.
108 struct WATCHDOG {
109 unsigned timeout; /* our time resolution */
110 WATCHDOG_FN action; /* application routine */
111 char *context; /* application context */
112 int trip_run; /* number of successive timeouts */
113 WATCHDOG *saved_watchdog; /* saved state */
114 struct sigaction saved_action; /* saved state */
115 unsigned saved_time; /* saved state */
119 * However, only one watchdog instance can be current, and the caller has to
120 * restore state before a prior watchdog instance can be manipulated.
122 static WATCHDOG *watchdog_curr;
124 /* watchdog_event - handle timeout event */
126 static void watchdog_event(int unused_sig)
128 const char *myname = "watchdog_event";
129 WATCHDOG *wp;
132 * This routine runs as a signal handler. We should not do anything that
133 * could involve memory allocation/deallocation, but exiting without
134 * proper explanation would be unacceptable. For this reason, msg(3) was
135 * made safe for usage by signal handlers that terminate the process.
137 if ((wp = watchdog_curr) == 0)
138 msg_panic("%s: no instance", myname);
139 if (msg_verbose > 1)
140 msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run);
141 if (++(wp->trip_run) < WATCHDOG_STEPS) {
142 alarm(wp->timeout);
143 } else {
144 if (wp->action)
145 wp->action(wp, wp->context);
146 else {
147 killme_after(5);
148 #ifdef TEST
149 pause();
150 #endif
151 msg_fatal("watchdog timeout");
156 /* watchdog_create - create watchdog instance */
158 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
160 const char *myname = "watchdog_create";
161 struct sigaction sig_action;
162 WATCHDOG *wp;
164 wp = (WATCHDOG *) mymalloc(sizeof(*wp));
165 if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0)
166 msg_panic("%s: timeout %d is too small", myname, timeout);
167 wp->action = action;
168 wp->context = context;
169 wp->saved_watchdog = watchdog_curr;
170 wp->saved_time = alarm(0);
171 sigemptyset(&sig_action.sa_mask);
172 #ifdef SA_RESTART
173 sig_action.sa_flags = SA_RESTART;
174 #else
175 sig_action.sa_flags = 0;
176 #endif
177 sig_action.sa_handler = watchdog_event;
178 if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0)
179 msg_fatal("%s: sigaction(SIGALRM): %m", myname);
180 if (msg_verbose > 1)
181 msg_info("%s: %p %d", myname, (void *) wp, timeout);
182 return (watchdog_curr = wp);
185 /* watchdog_destroy - destroy watchdog instance, restore state */
187 void watchdog_destroy(WATCHDOG *wp)
189 const char *myname = "watchdog_destroy";
191 watchdog_stop(wp);
192 watchdog_curr = wp->saved_watchdog;
193 if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0)
194 msg_fatal("%s: sigaction(SIGALRM): %m", myname);
195 if (wp->saved_time)
196 alarm(wp->saved_time);
197 myfree((char *) wp);
198 if (msg_verbose > 1)
199 msg_info("%s: %p", myname, (void *) wp);
202 /* watchdog_start - enable watchdog timer */
204 void watchdog_start(WATCHDOG *wp)
206 const char *myname = "watchdog_start";
208 if (wp != watchdog_curr)
209 msg_panic("%s: wrong watchdog instance", myname);
210 wp->trip_run = 0;
211 alarm(wp->timeout);
212 if (msg_verbose > 1)
213 msg_info("%s: %p", myname, (void *) wp);
216 /* watchdog_stop - disable watchdog timer */
218 void watchdog_stop(WATCHDOG *wp)
220 const char *myname = "watchdog_stop";
222 if (wp != watchdog_curr)
223 msg_panic("%s: wrong watchdog instance", myname);
224 alarm(0);
225 if (msg_verbose > 1)
226 msg_info("%s: %p", myname, (void *) wp);
229 /* watchdog_pat - pat the dog so it stays quiet */
231 void watchdog_pat(void)
233 const char *myname = "watchdog_pat";
235 if (watchdog_curr)
236 watchdog_curr->trip_run = 0;
237 if (msg_verbose > 1)
238 msg_info("%s: %p", myname, (void *) watchdog_curr);
241 #ifdef TEST
243 #include <vstream.h>
245 int main(int unused_argc, char **unused_argv)
247 WATCHDOG *wp;
249 msg_verbose = 2;
251 wp = watchdog_create(10, (WATCHDOG_FN) 0, (char *) 0);
252 watchdog_start(wp);
253 do {
254 watchdog_pat();
255 } while (VSTREAM_GETCHAR() != VSTREAM_EOF);
256 watchdog_destroy(wp);
257 return (0);
260 #endif