dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / cmd / picl / plugins / sun4u / snowbird / watchdog / piclwatchdog.c
blob00af353d0e1b564732c176bab7b6181908235114
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
30 * This module is used to monitor and control watchdog timer for
31 * UltraSPARC-IIi CPU in Snowbird
34 #include <stdio.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <strings.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <alloca.h>
43 #include <limits.h>
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <libintl.h>
47 #include <syslog.h>
48 #include <locale.h>
49 #include <picl.h>
50 #include <picltree.h>
51 #include <libnvpair.h>
52 #include <poll.h>
53 #include <errno.h>
54 #include <syslog.h>
55 #include <sys/priocntl.h>
56 #include <sys/rtpriocntl.h>
57 #include <sys/tspriocntl.h>
58 #include <sys/fsspriocntl.h>
59 #include <stropts.h>
60 #include <synch.h>
61 #include <signal.h>
62 #include <thread.h>
63 #include <picldefs.h>
64 #include <smclib.h>
65 #include "piclwatchdog.h"
67 #pragma init(wd_picl_register) /* init section */
69 /* debug variables */
70 static int wd_debug = 0;
71 static hrtime_t start1, end1;
72 static int count = 0;
73 typedef struct { /* used to keep track of time taken for last 5 pats */
74 int res_seq;
75 int req_seq;
76 int64_t time;
77 } wd_time_t;
79 #define NUMBER_OF_READINGS 5
80 static wd_time_t time1[NUMBER_OF_READINGS];
82 /* global declarations */
83 static int wd_fd = -1; /* fd used to send watchdog commands */
84 static int polling_fd = -1; /* polling thread that snoops for events */
85 static int wd_enable = 1;
86 static int state_configured = 0; /* chassis state */
87 static int props_created = 0;
88 static int wd_pat_thr_priority = -1;
89 static pid_t pid = -1; /* PID that owns watchdog services */
90 static cond_t patting_cv;
91 static mutex_t data_lock;
92 static mutex_t patting_lock;
93 static int32_t pat_time = 0;
94 static thread_t polling_thr_tid;
95 static thread_t patting_thr_tid;
96 static wd_data_t wd_data;
97 static char wd_conf[MAXPATHLEN];
99 #define NULLREAD (int (*)(ptree_rarg_t *, void *))0
100 #define NULLWRITE (int (*)(ptree_warg_t *, const void *))0
102 /* ptree interface */
103 static void wd_picl_register(void);
104 static void wd_picl_init(void);
105 static void wd_picl_fini(void);
106 static void wd_state_change_evhandler(const char *,
107 const void *, size_t, void *);
109 /* local functions */
110 static int wd_write_timeout(ptree_warg_t *, const void *);
111 static int wd_write_action(ptree_warg_t *, const void *);
112 static int wd_read_action(ptree_rarg_t *, void *);
113 static int wd_read_timeout(ptree_rarg_t *, void *);
114 extern char *strtok_r(char *s1, const char *s2, char **lasts);
115 extern int wd_get_chassis_type();
117 static picld_plugin_reg_t wd_reg_info = {
118 PICLD_PLUGIN_VERSION_1,
119 PICLD_PLUGIN_CRITICAL,
120 "SUNW_picl_watchdog",
121 wd_picl_init,
122 wd_picl_fini,
126 * This function parses wd.conf file to set the tunables
127 * tunables at present: patting thread priority, pat time, wd_enable
129 static void
130 wd_parse_config_file(char *wd_conf)
132 FILE *fp;
133 char buf[WD_CONF_MAXSIZE];
134 char *token, *last, *value;
136 if ((fp = fopen(wd_conf, "r")) == NULL) {
137 return;
140 while (fgets(buf, sizeof (buf), fp) != NULL) {
141 if (buf[0] == '\0' || buf[0] == '#') {
142 continue;
144 token = last = value = NULL;
145 value = (char *)strtok_r((char *)buf, WD_DELIMETER, &last);
146 if (last) {
147 token = (char *)strtok_r(last, WD_DELIMETER, &last);
148 } else {
149 continue;
152 if (value == NULL || token == NULL) {
153 continue;
155 if (strcmp(token, WD_PAT_THREAD_PRIORITY) == 0) {
156 wd_pat_thr_priority = strtol(value,
157 (char **)NULL, 10);
158 } else if (strcmp(token, WD_PATTING_TIME) == 0) {
159 errno = 0;
160 pat_time = strtol(value, (char **)NULL, 10);
161 if (errno != 0) {
162 pat_time = 0;
164 } else if (strcmp(token, WD_ENABLE) == 0) {
165 if (strcmp(value, "false") == 0) {
166 wd_enable = 0;
168 } else { /* unknown token */
169 continue;
172 (void) fclose(fp);
176 * read the SMC watchdog registers
178 static int
179 wd_get_reg_dump(uint8_t buffer[])
181 int rc = 0, i;
182 sc_reqmsg_t req_pkt;
183 sc_rspmsg_t rsp_pkt;
185 /* initialize the request packet */
186 (void) smc_init_smc_msg(&req_pkt, SMC_GET_WATCHDOG_TIMER,
187 DEFAULT_SEQN, 0);
189 /* make a call to smc library to send cmd */
190 if ((rc = smc_send_msg(DEFAULT_FD, &req_pkt, &rsp_pkt,
191 WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
192 WD_DEBUG1(WD_PICL_GET_ERR, rc);
193 return (PICL_FAILURE);
196 /* read 8 bytes */
197 bzero(buffer, WD_REGISTER_LEN);
198 for (i = 0; i < WD_REGISTER_LEN; i++) {
199 buffer[i] = rsp_pkt.data[i];
201 return (PICL_SUCCESS);
205 * get the HEALTHY# line state
206 * Return -1 for Error
207 * 0 for HEALTHY# down
208 * 1 for HEALTHY# up
210 static int
211 wd_get_healthy_status()
213 sc_reqmsg_t req_pkt;
214 sc_rspmsg_t rsp_pkt;
216 /* initialize the request packet */
217 (void) smc_init_smc_msg(&req_pkt, SMC_GET_EXECUTION_STATE,
218 DEFAULT_SEQN, 0);
220 /* make a call to smc library to send cmd */
221 if (smc_send_msg(DEFAULT_FD, &req_pkt, &rsp_pkt,
222 WD_POLL_TIMEOUT) != SMC_SUCCESS) {
223 return (-1);
226 return ((rsp_pkt.data[0] & IS_HEALTHY) ? WD_HEALTHY_UP :
227 WD_HEALTHY_DOWN);
230 /*ARGSUSED*/
231 static void
232 event_completion_handler(char *ename, void *earg, size_t size)
234 free(ename);
235 free(earg);
239 * posts picl-state-change event if there is change in watchdog-timer state
241 static picl_errno_t
242 post_wd_state_event(picl_nodehdl_t nodeh, char *state)
244 nvlist_t *nvl;
245 size_t nvl_size;
246 char *pack_buf = NULL;
247 picl_errno_t rc;
248 char *ename = PICLEVENT_STATE_CHANGE, *evname = NULL;
250 if (state == NULL) {
251 return (PICL_FAILURE);
254 if ((evname = strdup(ename)) == NULL) {
255 return (PICL_NOSPACE);
258 if ((rc = nvlist_alloc(&nvl, NV_UNIQUE_NAME_TYPE, NULL)) != 0) {
259 free(evname);
260 syslog(LOG_ERR, WD_NVLIST_ERR, rc);
261 return (PICL_FAILURE);
264 if ((rc = nvlist_add_uint64(nvl, PICLEVENTARG_NODEHANDLE,
265 nodeh)) != 0) {
266 nvlist_free(nvl);
267 free(evname);
268 syslog(LOG_ERR, WD_NVLIST_ERR, rc);
269 return (PICL_FAILURE);
272 if ((rc = nvlist_add_string(nvl, PICLEVENTARG_STATE,
273 state)) != 0) {
274 nvlist_free(nvl);
275 free(evname);
276 syslog(LOG_ERR, WD_NVLIST_ERR, rc);
277 return (PICL_FAILURE);
280 if ((rc = nvlist_pack(nvl, &pack_buf, &nvl_size, NV_ENCODE_NATIVE,
281 NULL)) != 0) {
282 nvlist_free(nvl);
283 free(evname);
284 syslog(LOG_ERR, WD_NVLIST_ERR, rc);
285 return (PICL_FAILURE);
288 if ((rc = ptree_post_event(evname, pack_buf, nvl_size,
289 event_completion_handler)) != PICL_SUCCESS) {
290 free(pack_buf);
291 free(evname);
292 nvlist_free(nvl);
293 return (rc);
295 nvlist_free(nvl);
296 return (PICL_SUCCESS);
300 * Updates the State value in picl tree and posts a state-change event
302 static void
303 wd_picl_update_state(int level, uint8_t stat)
305 picl_errno_t rc;
306 char state[PICL_PROPNAMELEN_MAX];
308 switch (stat) {
309 case WD_ARMED:
310 (void) strncpy(state, PICL_PROPVAL_WD_STATE_ARMED,
311 sizeof (state));
312 break;
313 case WD_DISARMED:
314 (void) strncpy(state, PICL_PROPVAL_WD_STATE_DISARMED,
315 sizeof (state));
316 break;
317 case WD_EXPIRED:
318 (void) strncpy(state, PICL_PROPVAL_WD_STATE_EXPIRED,
319 sizeof (state));
320 break;
321 default:
322 return;
325 (void) mutex_lock(&data_lock);
326 switch (level) {
327 case WD1:
328 wd_data.wd1_run_state = stat;
329 break;
330 case WD2:
331 wd_data.wd2_run_state = stat;
332 break;
333 case WD1_2:
334 wd_data.wd1_run_state = stat;
335 wd_data.wd2_run_state = stat;
336 break;
337 default:
338 return;
340 (void) mutex_unlock(&data_lock);
342 if (!state_configured) {
343 return;
346 switch (level) {
347 case WD1:
348 if ((rc = post_wd_state_event(wd_data.wd1_nodehdl,
349 state)) != PICL_SUCCESS) {
350 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
352 break;
353 case WD2:
354 if ((rc = post_wd_state_event(wd_data.wd2_nodehdl,
355 state)) != PICL_SUCCESS) {
356 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
358 break;
360 case WD1_2:
361 if ((rc = post_wd_state_event(wd_data.wd1_nodehdl,
362 state)) != PICL_SUCCESS) {
363 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
365 if ((rc = post_wd_state_event(wd_data.wd2_nodehdl,
366 state)) != PICL_SUCCESS) {
367 syslog(LOG_ERR, WD_PICL_POST_EVENT_ERR, rc);
369 break;
374 * Sends a command to SMC to reset the watchdog-timers
376 static int
377 wd_pat()
379 int rc = 0;
380 static uint8_t seq = 1;
381 sc_reqmsg_t req_pkt;
382 sc_rspmsg_t rsp_pkt;
384 if (seq < WD_MAX_SEQN) {
385 req_pkt.hdr.msg_id = seq++;
386 } else {
387 seq = 1;
388 req_pkt.hdr.msg_id = seq;
391 if (wd_debug & WD_TIME_DEBUG) {
392 start1 = gethrtime();
395 /* initialize the request packet */
396 (void) smc_init_smc_msg(&req_pkt, SMC_RESET_WATCHDOG_TIMER,
397 DEFAULT_SEQN, 0);
399 /* make a call to smc library to send cmd */
400 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
401 WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
402 syslog(LOG_CRIT, WD_PICL_PAT_ERR, rc);
403 return (PICL_FAILURE);
406 if (wd_debug & WD_TIME_DEBUG) {
407 end1 = gethrtime();
408 time1[count].res_seq = SC_MSG_ID(&rsp_pkt);
409 time1[count].req_seq = SC_MSG_ID(&req_pkt);
410 time1[count].time = (end1 - start1);
412 if (count < (NUMBER_OF_READINGS - 1)) {
413 count++;
414 } else {
415 count = 0;
418 return (PICL_SUCCESS);
421 /* used to set the new values for watchdog and start the watchdog */
422 static int
423 wd_start(uchar_t action_1, uchar_t action_2,
424 uchar_t timeout_2, uchar_t *timeout_1, uint8_t patting_option)
426 int rc = 0;
427 sc_reqmsg_t req_pkt;
428 sc_rspmsg_t rsp_pkt;
430 if (timeout_1 == NULL) {
431 return (PICL_FAILURE);
434 req_pkt.data[0] = WD_USEFLAG_OS;
435 req_pkt.data[1] = action_1 | action_2; /* actions */
436 req_pkt.data[2] = timeout_2; /* wd timeout 2 */
437 req_pkt.data[3] = WD_XPR_FLG_CLR_OS; /* expiration flags */
438 req_pkt.data[4] = timeout_1[1]; /* LSB for wd timeout 1 */
439 req_pkt.data[5] = timeout_1[0]; /* MSB for wd timeout 1 */
441 if (patting_option == ENABLE_AUTO_PAT) {
442 req_pkt.data[0] |= WD_ENABLE_AUTO_PAT;
445 /* initialize the request packet */
446 (void) smc_init_smc_msg(&req_pkt, SMC_SET_WATCHDOG_TIMER,
447 DEFAULT_SEQN, WD_SET_CMD_DATA_LEN);
449 /* make a call to smc library to send cmd */
450 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
451 WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
452 WD_DEBUG1(WD_PICL_START_ERR, rc);
453 return (PICL_FAILURE);
456 /* reset the watchdog timer */
457 (void) smc_init_smc_msg(&req_pkt, SMC_RESET_WATCHDOG_TIMER,
458 DEFAULT_SEQN, 0);
459 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
460 WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
461 WD_DEBUG1(WD_PICL_START_ERR, rc);
462 return (PICL_FAILURE);
464 return (PICL_SUCCESS);
468 * Validates timeout and action fields and arms the watchdog-timers
470 static int
471 wd_arm(uint8_t patting_option)
473 int rc;
474 uint16_t wd_time1;
475 uint8_t wd_time2, wd1_action, wd2_action;
476 uint8_t timeout1[2];
478 if (wd_data.wd1_timeout >= 0) {
479 wd_time1 = wd_data.wd1_timeout/WD_L1_RESOLUTION;
480 } else {
481 wd_time1 = 0;
484 if (wd_data.wd2_timeout >= 0) {
485 wd_time2 = wd_data.wd2_timeout/WD_L2_RESOLUTION;
486 } else {
487 wd_time2 = 0;
490 timeout1[0] = wd_time1 >> 8; /* MSB */
491 timeout1[1] = wd_time1 & 0x00ff; /* LSB */
493 /* check the HELATHY# status if action is alarm */
494 if (wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_HOST ||
495 wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_SAT) {
496 rc = wd_get_healthy_status();
497 if (rc == WD_HEALTHY_DOWN) {
498 WD_DEBUG0(WD_HEALTHY_ERR);
499 return (PICL_FAILURE);
500 } else if (rc == -1) {
501 syslog(LOG_ERR, WD_GET_HEALTH_ERR);
502 return (PICL_FAILURE);
506 if (wd_data.wd1_timeout == -1) {
507 wd1_action = WD_ACTION_NONE2;
508 } else {
509 wd1_action = wd_data.wd1_action;
512 if (wd_data.wd2_timeout == -1) {
513 wd2_action = WD_ACTION_NONE2;
514 } else {
515 wd2_action = wd_data.wd2_action;
518 rc = wd_start(wd1_action, wd2_action,
519 wd_time2, timeout1, patting_option);
520 return (rc);
524 * This is thread is a RealTime class thread. This thread pats the
525 * watchdog-timers in regular intervals before the expiry.
527 /*ARGSUSED*/
528 static void *
529 wd_patting_thread(void *args)
531 time_t sec;
532 pcinfo_t pci;
533 long nano_sec;
534 timestruc_t to;
535 long sleep_time;
536 struct timeval tp;
537 int err, state;
539 for (;;) {
540 (void) mutex_lock(&patting_lock);
541 while (wd_data.wd_pat_state == WD_NORESET) {
542 (void) cond_wait(&patting_cv, &patting_lock);
544 (void) mutex_unlock(&patting_lock);
546 /* reset pat-time to zero */
547 pat_time = 0; /* tunable */
548 wd_parse_config_file(wd_conf);
550 if (wd_pat_thr_priority < 0 || wd_pat_thr_priority > 59) {
551 wd_pat_thr_priority = WD_DEFAULT_THREAD_PRIORITY;
554 /* change the priority of thread to realtime class */
555 (void) strncpy(pci.pc_clname, "RT", sizeof (pci.pc_clname));
556 if (priocntl(P_LWPID, P_MYID, PC_GETCID, (caddr_t)&pci) != -1) {
557 pcparms_t pcp;
558 rtparms_t *rtp = (rtparms_t *)pcp.pc_clparms;
559 rtp->rt_pri = wd_pat_thr_priority;
560 rtp->rt_tqsecs = 0;
561 rtp->rt_tqnsecs = RT_TQDEF;
562 pcp.pc_cid = pci.pc_cid;
563 if (priocntl(P_LWPID, P_MYID, PC_SETPARMS,
564 (caddr_t)&pcp) != 0) {
565 syslog(LOG_ERR, WD_PICL_RT_THRD_FAIL);
567 } else {
568 syslog(LOG_ERR, WD_PICL_RT_THRD_NO_PERM_ERR);
571 switch (wd_data.wd1_timeout) {
572 case 0:
573 if (wd_arm(DISABLE_AUTO_PAT) == PICL_SUCCESS) {
574 wd_picl_update_state(WD1, WD_ARMED);
575 if (wd_data.wd2_timeout >= 0) {
576 wd_picl_update_state(WD2, WD_ARMED);
578 } else {
579 syslog(LOG_ERR, WD_PICL_START_ERR,
580 PICL_FAILURE);
582 /* no need to pat */
583 (void) mutex_lock(&patting_lock);
584 wd_data.wd_pat_state = WD_NORESET;
585 (void) mutex_unlock(&patting_lock);
586 continue;
587 case -1:
588 if (wd_data.wd2_timeout < 0) {
589 (void) mutex_lock(&patting_lock);
590 wd_data.wd_pat_state = WD_NORESET;
591 (void) mutex_unlock(&patting_lock);
592 continue;
594 if (wd_arm(DISABLE_AUTO_PAT) == PICL_SUCCESS) {
595 wd_picl_update_state(WD2, WD_ARMED);
596 } else {
597 syslog(LOG_ERR, WD_PICL_START_ERR,
598 PICL_FAILURE);
600 /* no need to pat */
601 (void) mutex_lock(&patting_lock);
602 wd_data.wd_pat_state = WD_NORESET;
603 (void) mutex_unlock(&patting_lock);
604 continue;
605 default:
606 break;
609 if (pat_time == 0) {
610 if (wd_data.wd1_timeout > WD_PAT_TIME) {
611 pat_time = WD_PAT_TIME;
612 } else {
613 pat_time = wd_data.wd1_timeout - 80;
616 if (pat_time <= 0) {
617 WD_DEBUG0(WD_PICL_PAT_TIME_ERR);
618 (void) mutex_lock(&patting_lock);
619 wd_data.wd_pat_state = WD_NORESET;
620 (void) mutex_unlock(&patting_lock);
621 continue;
623 sleep_time = wd_data.wd1_timeout - pat_time;
625 if (wd_data.wd1_timeout <= 0 || sleep_time <= 0) {
626 WD_DEBUG0(WD_PICL_ARM_PAT_ERR);
627 (void) mutex_lock(&patting_lock);
628 wd_data.wd_pat_state = WD_NORESET;
629 (void) mutex_unlock(&patting_lock);
630 continue;
631 } else {
632 wd_picl_update_state(WD1, WD_ARMED);
635 if (wd_data.wd2_timeout >= 0) {
636 wd_picl_update_state(WD2, WD_ARMED);
639 sec = sleep_time/1000;
640 nano_sec = (sleep_time - (sec * 1000)) * 1000000;
642 if (wd_arm(ENABLE_AUTO_PAT) != PICL_SUCCESS) {
643 wd_picl_update_state(WD1_2, WD_DISARMED);
644 (void) mutex_lock(&patting_lock);
645 wd_data.wd_pat_state = WD_NORESET;
646 (void) mutex_unlock(&patting_lock);
647 syslog(LOG_ERR, WD_PICL_START_ERR, PICL_FAILURE);
648 continue;
651 do /* pat the watchdog until expiry or user disarm */
653 (void) mutex_lock(&patting_lock);
654 state = wd_data.wd_pat_state;
655 if (state == WD_NORESET) {
656 (void) mutex_unlock(&patting_lock);
657 break;
659 (void) gettimeofday(&tp, NULL);
660 to.tv_sec = tp.tv_sec + sec;
661 if ((nano_sec + (tp.tv_usec * 1000)) >= 1000000000) {
662 to.tv_sec += 1;
663 to.tv_nsec = (nano_sec +
664 (tp.tv_usec * 1000)) - 1000000000;
665 } else {
666 to.tv_nsec = nano_sec + (tp.tv_usec * 1000);
669 err = cond_timedwait(&patting_cv, &patting_lock, &to);
670 (void) mutex_unlock(&patting_lock);
672 if (err == ETIME) { /* woke up from sleep */
673 (void) wd_pat();
675 } while (state == WD_RESET);
677 /*NOTREACHED*/
678 return (NULL);
682 * returns 0 if owner is not alive
683 * returns 1 if owner is alive
684 * returns -1 if there is no active owner
686 static int
687 is_owner_alive()
689 char strpid[50];
690 struct stat buf;
692 if (pid == -1) {
693 return (-1);
696 /* check if the file exists or not */
697 (void) snprintf(strpid, sizeof (pid), "/proc/%ld/status", pid);
698 errno = 0;
699 if (stat(strpid, &buf) == 0) {
700 return (1);
702 if (errno == ENOENT) {
703 return (0);
704 } else {
705 syslog(LOG_ERR, WD_GET_OWN_FAILED, errno);
707 return (-1);
711 * Sends a cmd to SMC to stop watchdog timers
713 static int
714 wd_stop()
716 int rc = 0;
717 sc_reqmsg_t req_pkt;
718 sc_rspmsg_t rsp_pkt;
719 uint8_t buffer[8];
721 if (wd_get_reg_dump(buffer) != 0) {
722 return (PICL_FAILURE);
724 /* clear the expiration flags */
725 buffer[3] = 0xff; /* expiration flags */
727 (void) memcpy(SC_MSG_DATA(&req_pkt), buffer,
728 WD_SET_CMD_DATA_LEN);
730 /* initialize the request packet */
731 (void) smc_init_smc_msg(&req_pkt, SMC_SET_WATCHDOG_TIMER,
732 DEFAULT_SEQN, WD_SET_CMD_DATA_LEN);
734 /* make a call to smc library to send cmd */
735 if ((rc = smc_send_msg(wd_fd, &req_pkt, &rsp_pkt,
736 WD_POLL_TIMEOUT)) != SMC_SUCCESS) {
737 syslog(LOG_ERR, WD_PICL_STOP_ERR, rc);
738 return (PICL_FAILURE);
740 return (PICL_SUCCESS);
744 * Function used by volatile callback function for wd-op property
745 * under controller. This is used to arm, disarm the watchdog-timers
746 * in response to user actions
748 static int
749 wd_worker_function(uint8_t flag, pid_t proc_id)
751 int rc = PICL_SUCCESS;
752 int wd1_state, wd2_state;
754 (void) mutex_lock(&data_lock);
755 wd1_state = wd_data.wd1_run_state;
756 wd2_state = wd_data.wd2_run_state;
757 (void) mutex_unlock(&data_lock);
759 switch (flag) {
761 case USER_ARMED_WD:
763 /* watchdog can only be armed if all the timers are disarmed */
764 if (wd1_state != WD_DISARMED) {
765 WD_DEBUG0(WD_PICL_WD1_RUNNING_ERR);
766 rc = PICL_FAILURE;
767 break;
769 if (wd2_state != WD_DISARMED) {
770 WD_DEBUG0(WD_PICL_WD2_RUNNING_ERR);
771 rc = PICL_FAILURE;
772 break;
775 /* check the HELATHY# status if action is alarm */
776 if (wd_data.wd1_timeout >= 0) {
777 if (wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_HOST ||
778 wd_data.wd1_action == WD_ACTION_HEALTHY_DOWN_SAT) {
779 rc = wd_get_healthy_status();
780 if (rc == WD_HEALTHY_DOWN) {
781 WD_DEBUG0(WD_HEALTHY_ERR);
782 return (PICL_FAILURE);
783 } else if (rc == -1) {
784 syslog(LOG_ERR, WD_GET_HEALTH_ERR);
785 return (PICL_FAILURE);
786 } else {
787 rc = PICL_SUCCESS;
792 /* signal the patting thread */
793 (void) mutex_lock(&patting_lock);
794 wd_data.wd_pat_state = WD_RESET;
795 (void) cond_signal(&patting_cv);
796 (void) mutex_unlock(&patting_lock);
797 break;
799 case USER_DISARMED_WD:
802 * if the caller doesnot own watchdog services,
803 * check to see if the owner is still alive using procfs
805 if (proc_id != pid) {
806 switch (is_owner_alive()) {
807 case -1:
808 if ((wd1_state != WD_DISARMED) ||
809 (wd2_state != WD_DISARMED)) {
810 break;
812 /* watchdog is already disarmed */
813 WD_DEBUG0(WD_PICL_NO_WD_ERR);
814 return (PICL_FAILURE);
815 case 1:
816 /* owner is still alive, deny the operation */
817 WD_DEBUG0(WD_PICL_PERM_DENIED);
818 return (PICL_PERMDENIED);
819 default:
820 break;
824 /* watchdog is running */
825 if ((rc = wd_stop()) == PICL_SUCCESS) {
826 wd_picl_update_state(WD1_2, WD_DISARMED);
827 (void) mutex_lock(&patting_lock);
828 wd_data.wd_pat_state = WD_NORESET;
829 (void) cond_signal(&patting_cv);
830 (void) mutex_unlock(&patting_lock);
832 break;
834 case USER_ARMED_PAT_WD: /* for debug purposes only */
837 * first arm-pat operation is used for arming the watchdog
838 * subsequent arm-pat operations will be used for patting
839 * the watchdog
841 /* WD is stopped */
842 if (wd1_state == WD_DISARMED && wd2_state == WD_DISARMED) {
843 if ((rc = wd_arm(DISABLE_AUTO_PAT)) == PICL_SUCCESS) {
844 if (wd_data.wd1_timeout >= 0) {
845 wd_picl_update_state(WD1, WD_ARMED);
848 if (wd_data.wd2_timeout >= 0) {
849 wd_picl_update_state(WD2, WD_ARMED);
851 } else {
852 return (rc);
854 } else { /* WD is running */
855 if (wd1_state != WD_ARMED) {
856 WD_DEBUG0(WD_PICL_NO_WD_ERR);
857 return (PICL_INVALIDARG);
860 /* check if OS is patting the watchdog or not */
861 (void) mutex_lock(&patting_lock);
862 if (wd_data.wd_pat_state == WD_RESET) {
863 WD_DEBUG0(WD_PICL_TRY_PAT_ERR);
864 (void) mutex_unlock(&patting_lock);
865 return (PICL_INVALIDARG);
868 /* check if the process owns the WD services */
869 if (proc_id != pid) {
870 WD_DEBUG0(WD_PICL_PERM_DENIED);
871 return (PICL_PERMDENIED);
873 rc = wd_pat();
875 break;
877 default:
878 rc = PICL_INVALIDARG;
879 break;
881 } /* switch */
883 return (rc);
886 /*ARGSUSED*/
887 static int
888 wd_write_op(ptree_warg_t *parg, const void *buf)
890 int rc = PICL_INVALIDARG;
891 uint8_t flag;
893 /* only after state is configured */
894 if (!state_configured) {
895 if (parg->cred.dc_pid != getpid()) {
896 WD_DEBUG0(WD_PICL_STATE_INVALID);
897 return (PICL_PERMDENIED);
901 /* only super user can write this property */
902 if (parg->cred.dc_euid != SUPER_USER) {
903 WD_DEBUG0(WD_NO_ROOT_PERM);
904 return (PICL_PERMDENIED);
907 if (strcmp((char *)buf, PICL_PROPVAL_WD_OP_ARM) == 0) {
908 flag = USER_ARMED_WD;
909 rc = PICL_SUCCESS;
912 if (strcmp((char *)buf, PICL_PROPVAL_WD_OP_DISARM) == 0) {
913 flag = USER_DISARMED_WD;
914 rc = PICL_SUCCESS;
917 /* for debug purpose only */
918 if (strcmp((char *)buf, WD_ARM_PAT) == 0) {
919 flag = USER_ARMED_PAT_WD;
920 rc = PICL_SUCCESS;
923 if (rc == PICL_SUCCESS) {
924 rc = wd_worker_function(flag, parg->cred.dc_pid);
925 } else {
926 rc = PICL_INVALIDARG;
929 if (rc == PICL_SUCCESS) {
931 switch (flag) {
932 case USER_ARMED_PAT_WD:
933 case USER_ARMED_WD:
935 /* get the process id of client */
936 if (parg->cred.dc_pid != getpid()) {
937 pid = parg->cred.dc_pid;
938 } else {
939 pid = -1;
941 break;
942 case USER_DISARMED_WD:
943 /* reset the pid */
944 pid = -1;
945 default:
946 break;
949 return (rc);
952 /* volatile call back function to read the watchdog L1 status */
953 /*ARGSUSED*/
954 static int
955 wd1_read_status(ptree_rarg_t *parg, void *buf)
957 int rc = PICL_SUCCESS;
959 (void) mutex_lock(&data_lock);
961 switch (wd_data.wd1_run_state) {
963 case WD_EXPIRED:
964 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_EXPIRED,
965 PICL_PROPNAMELEN_MAX);
966 break;
968 case WD_DISARMED:
969 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_DISARMED,
970 PICL_PROPNAMELEN_MAX);
971 break;
973 case WD_ARMED:
974 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_ARMED,
975 PICL_PROPNAMELEN_MAX);
976 break;
978 default:
979 rc = PICL_FAILURE;
981 (void) mutex_unlock(&data_lock);
982 return (rc);
986 * this function is used to read the state of L2 timer
988 static int
989 wd_get_wd2_status(int *present_status)
991 int rc;
992 uchar_t buffer[WD_REGISTER_LEN];
994 bzero(buffer, WD_REGISTER_LEN);
995 (void) mutex_lock(&data_lock);
996 *present_status = wd_data.wd2_run_state;
997 if (wd_data.wd2_run_state != WD_ARMED) {
998 /* we already have the latest state */
999 (void) mutex_unlock(&data_lock);
1000 return (PICL_SUCCESS);
1002 (void) mutex_unlock(&data_lock);
1004 /* read watchdog registers */
1005 if ((rc = wd_get_reg_dump(buffer)) != 0) {
1006 return (rc);
1009 if (buffer[0] & WD_WD_RUNNING) {
1010 *present_status = WD_ARMED;
1011 return (PICL_SUCCESS);
1014 if (buffer[3] != 0) {
1015 (void) mutex_lock(&data_lock);
1016 *present_status = wd_data.wd2_run_state = WD_EXPIRED;
1017 (void) mutex_unlock(&data_lock);
1019 return (PICL_SUCCESS);
1022 /* volatile call back function to read the watchdog L2 status */
1023 /*ARGSUSED*/
1024 static int
1025 wd2_read_status(ptree_rarg_t *parg, void *buf)
1027 int present_status, rc;
1029 if ((rc = wd_get_wd2_status(&present_status)) !=
1030 PICL_SUCCESS) {
1031 return (rc);
1034 /* copy the present state in user buffer */
1035 switch (present_status) {
1036 case WD_ARMED:
1037 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_ARMED,
1038 PICL_PROPNAMELEN_MAX);
1039 break;
1040 case WD_EXPIRED:
1041 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_EXPIRED,
1042 PICL_PROPNAMELEN_MAX);
1043 break;
1044 case WD_DISARMED:
1045 (void) strncpy((char *)buf, PICL_PROPVAL_WD_STATE_DISARMED,
1046 PICL_PROPNAMELEN_MAX);
1047 break;
1049 return (PICL_SUCCESS);
1052 /* this thread listens for watchdog expiry events */
1053 /*ARGSUSED*/
1054 static void *
1055 wd_polling(void *args)
1057 uint8_t stat;
1058 int poll_retval;
1059 struct pollfd fds;
1060 sc_rspmsg_t rsp_pkt;
1061 int i;
1063 fds.fd = polling_fd;
1064 fds.events = POLLIN | POLLPRI;
1065 fds.revents = 0;
1067 for (;;) {
1068 poll_retval = poll(&fds, 1, -1);
1069 if (props_created == 0)
1070 continue;
1071 switch (poll_retval) {
1072 case 0:
1073 break;
1075 case -1:
1076 syslog(LOG_ERR, WD_PICL_POLL_ERR);
1077 break;
1079 default:
1080 /* something happened */
1081 if ((read(polling_fd, &rsp_pkt,
1082 sizeof (sc_rspmsg_t))) < 0) {
1083 syslog(LOG_ERR, WD_PICL_SMC_READ_ERR);
1084 break;
1087 if (rsp_pkt.hdr.cmd == SMC_EXPIRED_WATCHDOG_NOTIF) {
1089 (void) mutex_lock(&data_lock);
1090 stat = wd_data.wd1_run_state;
1091 (void) mutex_unlock(&data_lock);
1093 if (stat != WD_ARMED) {
1094 continue;
1097 wd_picl_update_state(WD1, WD_EXPIRED);
1099 (void) mutex_lock(&patting_lock);
1100 wd_data.wd_pat_state = WD_NORESET;
1101 (void) cond_signal(&patting_cv);
1103 (void) mutex_unlock(&patting_lock);
1104 syslog(LOG_WARNING, WD_WD1_EXPIRED);
1105 if (wd_debug & WD_TIME_DEBUG) {
1106 syslog(LOG_ERR, " latest count : %d", count);
1107 for (i = 0; i < NUMBER_OF_READINGS; i++) {
1108 syslog(LOG_ERR, "i = %d, req_seq = %d,"
1109 "res_seq = %d, time = %lld nsec",
1110 i, time1[i].req_seq,
1111 time1[i].res_seq,
1112 time1[i].time);
1115 if (wd_data.reboot_action) {
1116 wd_data.reboot_action = 0;
1117 (void) system(SHUTDOWN_CMD);
1120 break;
1122 } /* switch */
1124 /*NOTREACHED*/
1125 return (NULL);
1129 * This function reads the hardware state and gets the status of
1130 * watchdog-timers
1132 static int
1133 wd_get_status(wd_state_t *state)
1135 picl_errno_t rc;
1136 uchar_t buffer[WD_REGISTER_LEN];
1138 bzero(buffer, WD_REGISTER_LEN);
1139 /* read watchdog registers */
1140 if ((rc = wd_get_reg_dump(buffer)) != 0) {
1141 return (rc);
1144 /* get action */
1145 state->action1 = buffer[1] & 0xF0; /* most significant 4 bits */
1146 if (state->action1 == 0x0) {
1147 state->action1 = WD_ACTION_NONE1;
1149 state->action2 = buffer[1] & 0x0F; /* least significant 4 bits */
1150 if (state->action2 == 0x0) {
1151 state->action2 = WD_ACTION_NONE2;
1154 state->timeout2 = buffer[2];
1155 state->timeout1[0] = buffer[5]; /* MSB */
1156 state->timeout1[1] = buffer[4]; /* LSB */
1158 state->present_t1[0] = buffer[7]; /* MSB */
1159 state->present_t1[1] = buffer[6]; /* LSB */
1161 if (buffer[0] & WD_WD_RUNNING) {
1162 state->present_state = WD_ARMED;
1163 return (PICL_SUCCESS);
1166 if (buffer[3] != 0) {
1167 state->present_state = WD_EXPIRED;
1168 return (PICL_SUCCESS);
1169 } else {
1170 state->present_state = WD_DISARMED;
1171 return (PICL_SUCCESS);
1175 /* read the smc hardware and intialize the internal state */
1176 static void
1177 wd_set_init_state()
1179 wd_state_t state;
1180 uint16_t tmp1, tmp2, wd_time1;
1182 if (wd_get_status(&state) != PICL_SUCCESS) {
1183 syslog(LOG_ERR, WD_PICL_GET_STAT_ERR);
1184 /* defualt state is expired ??? */
1185 state.present_state = WD_EXPIRED;
1188 switch (state.present_state) {
1189 case WD_EXPIRED:
1190 case WD_DISARMED:
1191 if (state.present_state == WD_EXPIRED)
1192 wd_picl_update_state(WD1_2, WD_EXPIRED);
1193 else
1194 wd_picl_update_state(WD1_2, WD_DISARMED);
1195 wd_data.wd_pat_state = WD_NORESET;
1196 wd_data.wd1_action = state.action1;
1197 wd_data.wd2_action = state.action2;
1198 tmp1 = state.timeout1[0] << 8;
1199 tmp2 = state.timeout1[1];
1200 wd_time1 = tmp1 | tmp2;
1201 wd_data.wd1_timeout = wd_time1 * WD_L1_RESOLUTION;
1202 wd_data.wd2_timeout = state.timeout2 * WD_L2_RESOLUTION;
1203 break;
1204 case WD_ARMED:
1206 * get the present values and restart the
1207 * watchdog from os level and continue to pat
1209 wd_picl_update_state(WD1_2, WD_ARMED);
1210 wd_data.wd_pat_state = WD_RESET;
1211 wd_data.wd1_action = (state.action1 << 4);
1212 wd_data.wd2_action = state.action2;
1214 tmp1 = state.timeout1[0] << 8;
1215 tmp2 = state.timeout1[1];
1216 wd_time1 = tmp1 | tmp2;
1217 wd_data.wd1_timeout = wd_time1 * WD_L1_RESOLUTION;
1218 wd_data.wd2_timeout = state.timeout2 * WD_L2_RESOLUTION;
1219 (void) wd_stop();
1224 * wrapper for ptree interface to create property
1226 static int
1227 wd_create_property(
1228 int ptype, /* PICL property type */
1229 int pmode, /* PICL access mode */
1230 size_t psize, /* size of PICL property */
1231 char *pname, /* property name */
1232 int (*readfn)(ptree_rarg_t *, void *),
1233 int (*writefn)(ptree_warg_t *, const void *),
1234 picl_nodehdl_t nodeh, /* node for property */
1235 picl_prophdl_t *propp, /* pointer to prop_handle */
1236 void *vbuf) /* initial value */
1238 picl_errno_t rc;
1239 ptree_propinfo_t propinfo;
1241 rc = ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
1242 ptype, pmode, psize, pname, readfn, writefn);
1243 if (rc != PICL_SUCCESS) {
1244 syslog(LOG_ERR, WD_PICL_PROP_INIT_ERR, rc);
1245 return (rc);
1248 rc = ptree_create_and_add_prop(nodeh, &propinfo, vbuf, propp);
1249 if (rc != PICL_SUCCESS) {
1250 return (rc);
1253 return (PICL_SUCCESS);
1256 /* Create and add Watchdog properties */
1257 static void
1258 wd_create_add_props()
1260 int rc;
1261 picl_nodehdl_t rooth, sysmgmt_h, platformh;
1262 int32_t timeout1 = 0;
1263 int32_t timeout2 = 0;
1264 char buf[PICL_WD_PROPVAL_MAX];
1266 /* get picl root node handle */
1267 if ((rc = ptree_get_root(&rooth)) != PICL_SUCCESS) {
1268 syslog(LOG_ERR, WD_NODE_INIT_ERR, 1, rc);
1269 return;
1272 /* get picl platform node handle */
1273 if ((rc = ptree_get_node_by_path(PLATFORM_PATH,
1274 &platformh)) != PICL_SUCCESS) {
1275 syslog(LOG_ERR, WD_NODE_INIT_ERR, 2, rc);
1276 return;
1279 /* get the picl sysmgmt node handle */
1280 if ((rc = ptree_find_node(platformh, PICL_PROP_NAME,
1281 PICL_PTYPE_CHARSTRING,
1282 PICL_NODE_SYSMGMT, strlen(PICL_NODE_SYSMGMT),
1283 &sysmgmt_h)) != PICL_SUCCESS) {
1284 syslog(LOG_ERR, WD_NODE_INIT_ERR, 3, rc);
1285 return;
1288 /* start creating the watchdog nodes and properties */
1289 if ((rc = ptree_create_and_add_node(sysmgmt_h, PICL_NODE_WD_CONTROLLER,
1290 PICL_CLASS_WATCHDOG_CONTROLLER,
1291 &(wd_data.wd_ctrl_nodehdl))) != PICL_SUCCESS) {
1292 syslog(LOG_ERR, WD_NODE_INIT_ERR, 4, rc);
1293 return;
1296 /* Add wd-op property to watchdog controller node */
1297 (void) strncpy(buf, "", sizeof (buf));
1298 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1299 PICL_WRITE + PICL_VOLATILE,
1300 PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_OPERATION,
1301 NULL, wd_write_op,
1302 wd_data.wd_ctrl_nodehdl,
1303 &(wd_data.wd_ops_hdl),
1304 (void *)buf)) != PICL_SUCCESS) {
1305 syslog(LOG_ERR, WD_NODE_INIT_ERR, 5, rc);
1306 return;
1309 /* create L1 node and add to controller */
1310 if ((rc = ptree_create_and_add_node(wd_data.wd_ctrl_nodehdl,
1311 PICL_NODE_WD_L1, PICL_CLASS_WATCHDOG_TIMER,
1312 &(wd_data.wd1_nodehdl))) != PICL_SUCCESS) {
1313 syslog(LOG_ERR, WD_NODE_INIT_ERR, 6, rc);
1314 return;
1317 /* create L2 node and add to controller */
1318 if ((rc = ptree_create_and_add_node(wd_data.wd_ctrl_nodehdl,
1319 PICL_NODE_WD_L2, PICL_CLASS_WATCHDOG_TIMER,
1320 &(wd_data.wd2_nodehdl))) != PICL_SUCCESS) {
1321 syslog(LOG_ERR, WD_NODE_INIT_ERR, 7, rc);
1322 return;
1325 /* create watchdog properties */
1326 /* create state property here */
1327 (void) strncpy(buf, PICL_PROPVAL_WD_STATE_DISARMED,
1328 sizeof (buf));
1329 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1330 PICL_READ + PICL_VOLATILE, PICL_PROPNAMELEN_MAX,
1331 PICL_PROP_STATE, wd1_read_status, NULLWRITE,
1332 wd_data.wd1_nodehdl,
1333 &(wd_data.wd1_state_hdl), (void *)buf)) != PICL_SUCCESS) {
1334 syslog(LOG_ERR, WD_NODE_INIT_ERR, 8, rc);
1335 return;
1338 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1339 PICL_READ + PICL_VOLATILE, PICL_PROPNAMELEN_MAX,
1340 PICL_PROP_STATE, wd2_read_status, NULLWRITE,
1341 wd_data.wd2_nodehdl,
1342 &(wd_data.wd2_state_hdl), (void *)buf)) != PICL_SUCCESS) {
1343 syslog(LOG_ERR, WD_NODE_INIT_ERR, 9, rc);
1344 return;
1347 /* create timeout property here */
1348 if ((rc = wd_create_property(PICL_PTYPE_UNSIGNED_INT,
1349 PICL_READ + PICL_WRITE + PICL_VOLATILE,
1350 sizeof (timeout1), PICL_PROP_WATCHDOG_TIMEOUT,
1351 wd_read_timeout, wd_write_timeout, wd_data.wd1_nodehdl,
1352 &(wd_data.wd1_timeout_hdl), (void *)&(timeout1))) !=
1353 PICL_SUCCESS) {
1354 syslog(LOG_ERR, WD_NODE_INIT_ERR, 10, rc);
1355 return;
1358 if ((rc = wd_create_property(PICL_PTYPE_UNSIGNED_INT,
1359 PICL_READ + PICL_WRITE + PICL_VOLATILE,
1360 sizeof (wd_data.wd2_timeout), PICL_PROP_WATCHDOG_TIMEOUT,
1361 wd_read_timeout, wd_write_timeout, wd_data.wd2_nodehdl,
1362 &(wd_data.wd2_timeout_hdl), (void *)&(timeout2))) !=
1363 PICL_SUCCESS) {
1364 syslog(LOG_ERR, WD_NODE_INIT_ERR, 11, rc);
1365 return;
1368 /* create wd_action property here */
1369 (void) strncpy(buf, PICL_PROPVAL_WD_ACTION_NONE,
1370 sizeof (buf));
1371 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1372 PICL_READ + PICL_WRITE + PICL_VOLATILE,
1373 PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_ACTION,
1374 wd_read_action, wd_write_action,
1375 wd_data.wd1_nodehdl, &(wd_data.wd1_action_hdl),
1376 (void *)buf)) != PICL_SUCCESS) {
1377 syslog(LOG_ERR, WD_NODE_INIT_ERR, 12, rc);
1378 return;
1381 if ((rc = wd_create_property(PICL_PTYPE_CHARSTRING,
1382 PICL_READ + PICL_WRITE + PICL_VOLATILE,
1383 PICL_PROPNAMELEN_MAX, PICL_PROP_WATCHDOG_ACTION,
1384 wd_read_action, wd_write_action,
1385 wd_data.wd2_nodehdl, &(wd_data.wd2_action_hdl),
1386 (void *)buf)) != PICL_SUCCESS) {
1387 syslog(LOG_ERR, WD_NODE_INIT_ERR, 13, rc);
1388 return;
1392 static int
1393 wd_ioctl(int fd, int cmd, int len, char *buf)
1395 int rtnval;
1396 struct strioctl sioc;
1397 sioc.ic_cmd = cmd;
1398 sioc.ic_timout = 60;
1399 sioc.ic_len = len;
1400 sioc.ic_dp = buf;
1401 rtnval = ioctl(fd, I_STR, &sioc);
1402 return (rtnval);
1405 static int
1406 wd_open(int attr)
1408 int cc;
1409 sc_cmdspec_t wd_cmdspec;
1411 if ((wd_fd = open(SMC_NODE, attr)) < 0) {
1412 return (-1);
1415 /* get exclusive access for set and reset commands of watchdog */
1416 wd_cmdspec.args[0] = SMC_SET_WATCHDOG_TIMER;
1417 wd_cmdspec.args[1] = SMC_RESET_WATCHDOG_TIMER;
1418 wd_cmdspec.attribute = SC_ATTR_EXCLUSIVE;
1420 cc = wd_ioctl(wd_fd, SCIOC_MSG_SPEC, 3,
1421 (char *)&wd_cmdspec);
1422 if (cc < 0) {
1423 syslog(LOG_ERR, WD_PICL_EXCLUSIVE_ACCESS_ERR);
1424 return (-1);
1426 return (wd_fd);
1429 static int
1430 wd_open_pollfd(int attr)
1432 int cc;
1433 sc_cmdspec_t wd_cmdspec;
1435 if ((polling_fd = open(SMC_NODE, attr)) < 0) {
1436 return (-1);
1439 /* request for watchdog expiry notification */
1440 wd_cmdspec.args[0] = SMC_EXPIRED_WATCHDOG_NOTIF;
1441 wd_cmdspec.attribute = SC_ATTR_EXCLUSIVE;
1443 cc = wd_ioctl(polling_fd, SCIOC_MSG_SPEC, 2,
1444 (char *)&wd_cmdspec);
1445 if (cc < 0) {
1446 syslog(LOG_ERR, WD_PICL_SET_ATTR_FAILED);
1447 return (-1);
1449 return (polling_fd);
1452 /* read the ENVIRONMENT variables and initialize tunables */
1453 static void
1454 wd_get_env()
1456 char *val;
1457 int intval = 0;
1459 /* read frutree debug flag value */
1460 if (val = getenv(WATCHDOG_DEBUG)) {
1461 errno = 0;
1462 intval = strtol(val, (char **)NULL, 0);
1463 if (errno == 0) {
1464 wd_debug = intval;
1470 * PTREE Entry Points
1473 /* picl-state-change event handler */
1474 /*ARGSUSED*/
1475 static void
1476 wd_state_change_evhandler(const char *ename, const void *earg,
1477 size_t size, void *cookie)
1479 char *value;
1480 picl_errno_t rc;
1481 nvlist_t *nvlp;
1482 picl_nodehdl_t fruhdl;
1483 static int spawn_threads = 1;
1484 char name[PICL_PROPNAMELEN_MAX];
1486 if (strcmp(ename, PICLEVENT_STATE_CHANGE)) {
1487 return;
1490 /* neglect all events if wd props are already created */
1491 if (props_created && state_configured) {
1492 return;
1495 if (nvlist_unpack((char *)earg, size, &nvlp, NULL)) {
1496 return;
1498 if ((nvlist_lookup_uint64(nvlp, PICLEVENTARG_NODEHANDLE,
1499 &fruhdl)) == -1) {
1500 nvlist_free(nvlp);
1501 return;
1503 if (nvlist_lookup_string(nvlp, PICLEVENTARG_STATE, &value)) {
1504 nvlist_free(nvlp);
1505 return;
1508 rc = ptree_get_propval_by_name(fruhdl, PICL_PROP_NAME,
1509 (void *)name, sizeof (name));
1510 if (rc != PICL_SUCCESS) {
1511 nvlist_free(nvlp);
1512 return;
1515 /* care for only events on chassis node */
1516 if (strcmp(name, PICL_NODE_CHASSIS) != 0) {
1517 nvlist_free(nvlp);
1518 return;
1521 if (strcmp(value, PICLEVENTARGVAL_CONFIGURED) == 0) {
1522 state_configured = 1;
1523 nvlist_free(nvlp);
1524 return;
1527 if (strcmp(value, PICLEVENTARGVAL_CONFIGURING) != 0) {
1528 nvlist_free(nvlp);
1529 return;
1532 if (wd_fd < 0) {
1533 if ((wd_fd = wd_open(O_RDWR)) < 0) {
1534 syslog(LOG_CRIT, WD_PICL_SMC_OPEN_ERR);
1535 nvlist_free(nvlp);
1536 return;
1540 if (polling_fd < 0) {
1541 if ((polling_fd = wd_open_pollfd(O_RDWR)) < 0) {
1542 syslog(LOG_CRIT, WD_PICL_SMC_OPEN_ERR);
1543 nvlist_free(nvlp);
1544 return;
1548 switch (wd_get_chassis_type()) {
1549 case WD_HOST: /* is host */
1550 wd_data.is_host = B_TRUE;
1551 break;
1552 case WD_STANDALONE: /* is satellite */
1553 wd_data.is_host = B_FALSE;
1554 break;
1555 default:
1556 nvlist_free(nvlp);
1557 return;
1560 (void) wd_create_add_props(); /* create and add properties */
1561 props_created = 1;
1563 /* read the hardware and initialize values */
1564 (void) wd_set_init_state();
1566 /* initialize wd-conf value */
1567 (void) snprintf(wd_conf, sizeof (wd_conf), "%s/%s",
1568 PICL_CONFIG_DIR, WD_CONF_FILE);
1570 if (spawn_threads == 0) {
1571 /* threads are already created */
1572 nvlist_free(nvlp);
1573 return;
1576 /* start monitoring for the events */
1577 if (thr_create(NULL, NULL, wd_polling,
1578 NULL, THR_BOUND, &polling_thr_tid) != 0) {
1579 syslog(LOG_ERR, WD_PICL_THREAD_CREATE_FAILED,
1580 "polling");
1581 nvlist_free(nvlp);
1582 return;
1585 /* thread used to pat watchdog */
1586 if (thr_create(NULL, NULL, wd_patting_thread,
1587 NULL, THR_BOUND, &patting_thr_tid) != 0) {
1588 syslog(LOG_ERR, WD_PICL_THREAD_CREATE_FAILED,
1589 "patting");
1590 nvlist_free(nvlp);
1591 return;
1593 spawn_threads = 0;
1594 nvlist_free(nvlp);
1597 static void
1598 wd_picl_register(void)
1600 int rc = 0;
1601 if ((rc = picld_plugin_register(&wd_reg_info)) != PICL_SUCCESS) {
1602 syslog(LOG_ERR, WD_PICL_REG_ERR, rc);
1606 /* entry point (initialization) */
1607 static void
1608 wd_picl_init(void)
1610 /* initialize the wd_conf path and name */
1611 (void) snprintf(wd_conf, sizeof (wd_conf), "%s/%s",
1612 PICL_CONFIG_DIR, WD_CONF_FILE);
1614 /* parse configuration file and set tunables */
1615 wd_parse_config_file(wd_conf);
1617 /* if watchdog-enable is set to false dont intialize wd subsystem */
1618 if (wd_enable == 0) {
1619 return;
1622 /* read watchdog related environment variables */
1623 wd_get_env();
1625 /* event handler for state change notifications from frutree */
1626 (void) ptree_register_handler(PICLEVENT_STATE_CHANGE,
1627 wd_state_change_evhandler, NULL);
1630 static void
1631 wd_picl_fini(void)
1633 (void) ptree_unregister_handler(PICLEVENT_STATE_CHANGE,
1634 wd_state_change_evhandler, NULL);
1636 state_configured = 0; /* chassis state */
1637 props_created = 0;
1638 (void) ptree_delete_node(wd_data.wd_ctrl_nodehdl);
1639 (void) ptree_destroy_node(wd_data.wd_ctrl_nodehdl);
1643 * volatile function to read the timeout
1645 static int
1646 wd_read_timeout(ptree_rarg_t *parg, void *buf)
1648 /* update the buffer provided by user */
1649 (void) mutex_lock(&data_lock);
1650 if (parg->proph == wd_data.wd1_timeout_hdl) {
1651 *(int32_t *)buf = wd_data.wd1_timeout;
1652 } else if (parg->proph == wd_data.wd2_timeout_hdl) {
1653 *(int32_t *)buf = wd_data.wd2_timeout;
1655 (void) mutex_unlock(&data_lock);
1656 return (PICL_SUCCESS);
1660 * volatile function to read the action
1662 static int
1663 wd_read_action(ptree_rarg_t *parg, void *buf)
1665 (void) mutex_lock(&data_lock);
1666 if (parg->proph == wd_data.wd1_action_hdl) {
1667 switch (wd_data.wd1_action) {
1668 case WD_ACTION_HEALTHY_DOWN_HOST:
1669 case WD_ACTION_HEALTHY_DOWN_SAT:
1670 (void) strcpy((char *)buf,
1671 PICL_PROPVAL_WD_ACTION_ALARM);
1672 break;
1673 case WD_ACTION_NONE1:
1674 case WD_ACTION_NONE2:
1675 if (wd_data.reboot_action == 1) {
1676 (void) strcpy((char *)buf,
1677 PICL_PROPVAL_WD_ACTION_REBOOT);
1678 } else {
1679 (void) strcpy((char *)buf,
1680 PICL_PROPVAL_WD_ACTION_NONE);
1682 break;
1684 } else if (parg->proph == wd_data.wd2_action_hdl) {
1685 switch (wd_data.wd2_action) {
1686 case WD_ACTION_HARD_RESET:
1687 (void) strcpy((char *)buf,
1688 PICL_PROPVAL_WD_ACTION_RESET);
1689 break;
1690 case WD_ACTION_NONE2:
1691 (void) strcpy((char *)buf, PICL_PROPVAL_WD_ACTION_NONE);
1692 break;
1695 (void) mutex_unlock(&data_lock);
1696 return (PICL_SUCCESS);
1700 * volatile function to write the action
1701 * this function validates the user value before programming the
1702 * action property. Properties can be modified only when watchdog
1703 * is in disarmed state.
1705 static int
1706 wd_write_action(ptree_warg_t *parg, const void *buf)
1708 int flag = 0x0;
1709 picl_errno_t rc = PICL_SUCCESS;
1710 char wd_action[PICL_WD_PROPVAL_MAX];
1712 /* only super user can write this property */
1713 if (parg->cred.dc_euid != SUPER_USER) {
1714 return (PICL_PERMDENIED);
1717 if (parg->proph == wd_data.wd1_action_hdl) {
1718 flag = WD1;
1719 } else if (parg->proph == wd_data.wd2_action_hdl) {
1720 flag = WD2;
1723 /* dont allow any write operations when watchdog is armed */
1724 (void) mutex_lock(&data_lock);
1725 if (wd_data.wd1_run_state != WD_DISARMED ||
1726 wd_data.wd2_run_state != WD_DISARMED) {
1727 (void) mutex_unlock(&data_lock);
1728 return (PICL_PERMDENIED);
1731 /* validate the values and store in internal cache */
1732 (void) strcpy(wd_action, (char *)buf);
1733 switch (flag) {
1734 case WD1:
1735 if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_ALARM) == 0) {
1736 if (wd_data.is_host)
1737 wd_data.wd1_action = WD_ACTION_HEALTHY_DOWN_HOST;
1738 else
1739 wd_data.wd1_action = WD_ACTION_HEALTHY_DOWN_SAT;
1740 wd_data.reboot_action = 0;
1741 } else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_NONE) == 0) {
1742 wd_data.wd1_action = WD_ACTION_NONE1;
1743 wd_data.reboot_action = 0;
1744 } else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_REBOOT) == 0) {
1745 wd_data.wd1_action = WD_ACTION_NONE1;
1746 wd_data.reboot_action = 1;
1747 } else {
1748 rc = PICL_INVALIDARG;
1750 break;
1752 case WD2:
1753 if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_RESET) == 0) {
1754 wd_data.wd2_action = WD_ACTION_HARD_RESET;
1755 } else if (strcmp(wd_action, PICL_PROPVAL_WD_ACTION_NONE) == 0) {
1756 wd_data.wd2_action = WD_ACTION_NONE2;
1757 } else {
1758 rc = PICL_INVALIDARG;
1760 break;
1762 (void) mutex_unlock(&data_lock);
1763 return (rc);
1767 * volatile function to write the timeout
1768 * this function validates the user value before programming the
1769 * timeout property. Properties can be modified only when watchdog
1770 * is in disarmed state.
1772 static int
1773 wd_write_timeout(ptree_warg_t *parg, const void *buf)
1775 int32_t timeout;
1776 int flag = 0x0;
1778 /* only super user can write this property */
1779 if (parg->cred.dc_euid != SUPER_USER) {
1780 return (PICL_PERMDENIED);
1783 /* dont allow any write operations when watchdog is armed */
1784 (void) mutex_lock(&data_lock);
1785 if (wd_data.wd1_run_state != WD_DISARMED ||
1786 wd_data.wd2_run_state != WD_DISARMED) {
1787 (void) mutex_unlock(&data_lock);
1788 return (PICL_PERMDENIED);
1790 (void) mutex_unlock(&data_lock);
1792 if (parg->proph == wd_data.wd1_timeout_hdl) {
1793 flag = WD1;
1794 } else if (parg->proph == wd_data.wd2_timeout_hdl) {
1795 flag = WD2;
1798 /* validate the timeout values */
1799 timeout = *(int32_t *)buf;
1800 if (timeout < -1) {
1801 return (PICL_INVALIDARG);
1804 if (timeout > 0) {
1805 switch (flag) {
1806 case WD1:
1807 if ((timeout % WD_L1_RESOLUTION) != 0) {
1808 return (PICL_INVALIDARG);
1810 if ((timeout/WD_L1_RESOLUTION) > WD_MAX_L1) {
1811 return (PICL_INVALIDARG);
1813 break;
1814 case WD2:
1815 if ((timeout % WD_L2_RESOLUTION) != 0) {
1816 return (PICL_INVALIDARG);
1818 if ((timeout/WD_L2_RESOLUTION) > WD_MAX_L2) {
1819 /* 255 sec */
1820 return (PICL_INVALIDARG);
1825 /* update the internal cache */
1826 (void) mutex_lock(&data_lock);
1827 switch (flag) {
1828 case WD1:
1829 wd_data.wd1_timeout = timeout;
1830 break;
1831 case WD2:
1832 wd_data.wd2_timeout = timeout;
1833 break;
1835 (void) mutex_unlock(&data_lock);
1836 return (PICL_SUCCESS);