8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / rcm_daemon / common / rcm_lock.c
blob5f8c56ce866b3239f046cdc032a0b6a73d341618
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include "rcm_impl.h"
29 #include "rcm_module.h"
32 * Global locks
34 mutex_t rcm_req_lock; /* protects global dr & info request list */
37 * Daemon state file
39 static int state_fd;
40 #define RCM_STATE_FILE "/var/run/rcm_daemon_state"
41 #define N_REQ_CHUNK 10 /* grow 10 entries at a time */
44 * Daemon timeout value
46 #define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */
49 * Struct for a list of outstanding rcm requests
51 typedef struct {
52 int seq_num; /* sequence number of request */
53 int state; /* current state */
54 pid_t pid; /* pid of initiator */
55 uint_t flag; /* request flags */
56 int type; /* resource(device) type */
57 timespec_t interval; /* suspend interval */
58 char device[MAXPATHLEN]; /* name of device or resource */
59 } req_t;
61 typedef struct {
62 int n_req;
63 int n_req_max; /* number of req_t's to follow */
64 int n_seq_max; /* last sequence number */
65 int idle_timeout; /* persist idle timeout value */
66 req_t req[1];
67 /* more req_t follows */
68 } req_list_t;
70 static req_list_t *dr_req_list;
71 static req_list_t *info_req_list;
73 static const char *locked_info = "DR operation in progress";
74 static const char *locked_err = "Resource is busy";
76 static int rcmd_get_state();
77 static void add_to_polling_list(pid_t);
78 static void remove_from_polling_list(pid_t);
80 void start_polling_thread();
81 static void stop_polling_thread();
84 * Initialize request lists required for locking
86 void
87 rcmd_lock_init(void)
89 int size;
90 struct stat fbuf;
93 * Start info list with one slot, then grow on demand.
95 info_req_list = s_calloc(1, sizeof (req_list_t));
96 info_req_list->n_req_max = 1;
99 * Open daemon state file and map in contents
101 state_fd = open(RCM_STATE_FILE, O_CREAT|O_RDWR, 0600);
102 if (state_fd == -1) {
103 rcm_log_message(RCM_ERROR, gettext("cannot open %s: %s\n"),
104 RCM_STATE_FILE, strerror(errno));
105 rcmd_exit(errno);
108 if (fstat(state_fd, &fbuf) != 0) {
109 rcm_log_message(RCM_ERROR, gettext("cannot stat %s: %s\n"),
110 RCM_STATE_FILE, strerror(errno));
111 rcmd_exit(errno);
114 size = fbuf.st_size;
115 if (size == 0) {
116 size = sizeof (req_list_t);
117 if (ftruncate(state_fd, size) != 0) {
118 rcm_log_message(RCM_ERROR,
119 gettext("cannot truncate %s: %s\n"),
120 RCM_STATE_FILE, strerror(errno));
121 rcmd_exit(errno);
125 /*LINTED*/
126 dr_req_list = (req_list_t *)mmap(NULL, size, PROT_READ|PROT_WRITE,
127 MAP_SHARED, state_fd, 0);
128 if (dr_req_list == MAP_FAILED) {
129 rcm_log_message(RCM_ERROR, gettext("cannot mmap %s: %s\n"),
130 RCM_STATE_FILE, strerror(errno));
131 rcmd_exit(errno);
135 * Initial size is one entry
137 if (dr_req_list->n_req_max == 0) {
138 dr_req_list->n_req_max = 1;
139 (void) fsync(state_fd);
140 return;
143 rcm_log_message(RCM_DEBUG, "n_req = %d, n_req_max = %d\n",
144 dr_req_list->n_req, dr_req_list->n_req_max);
147 * Recover the daemon state
149 clean_dr_list();
153 * Get a unique sequence number--to be called with rcm_req_lock held.
155 static int
156 get_seq_number()
158 int number;
160 if (dr_req_list == NULL)
161 return (0);
163 dr_req_list->n_seq_max++;
164 number = (dr_req_list->n_seq_max << SEQ_NUM_SHIFT);
165 (void) fsync(state_fd);
167 return (number);
171 * Find entry in list with the same resource name and sequence number.
172 * If seq_num == -1, no seq_num matching is required.
174 static req_t *
175 find_req_entry(char *device, uint_t flag, int seq_num, req_list_t *list)
177 int i;
180 * Look for entry with the same resource and seq_num.
181 * Also match RCM_FILESYS field in flag.
183 for (i = 0; i < list->n_req_max; i++) {
184 if (list->req[i].state == RCM_STATE_REMOVE)
185 /* stale entry */
186 continue;
188 * We need to distiguish a file system root from the directory
189 * it is mounted on.
191 * Applications are not aware of any difference between the
192 * two, but the system keeps track of it internally by
193 * checking for mount points while traversing file path.
194 * In a similar spirit, RCM is keeping this difference as
195 * an implementation detail.
197 if ((strcmp(device, list->req[i].device) != 0) ||
198 (list->req[i].flag & RCM_FILESYS) != (flag & RCM_FILESYS))
199 /* different resource */
200 continue;
202 if ((seq_num != -1) && ((seq_num >> SEQ_NUM_SHIFT) !=
203 (list->req[i].seq_num >> SEQ_NUM_SHIFT)))
204 /* different base seqnum */
205 continue;
207 return (&list->req[i]);
210 return (NULL);
214 * Get the next empty req_t entry. If no entry exists, grow the list.
216 static req_t *
217 get_req_entry(req_list_t **listp)
219 int i;
220 int n_req = (*listp)->n_req;
221 int n_req_max = (*listp)->n_req_max;
224 * If the list is full, grow the list and return the first
225 * entry in the new portion.
227 if (n_req == n_req_max) {
228 int newsize;
230 n_req_max += N_REQ_CHUNK;
231 newsize = sizeof (req_list_t) + (n_req_max - 1) *
232 sizeof (req_t);
234 if (listp == &info_req_list) {
235 *listp = s_realloc(*listp, newsize);
236 } else if (ftruncate(state_fd, newsize) != 0) {
237 rcm_log_message(RCM_ERROR,
238 gettext("cannot truncate %s: %s\n"),
239 RCM_STATE_FILE, strerror(errno));
240 rcmd_exit(errno);
241 /*LINTED*/
242 } else if ((*listp = (req_list_t *)mmap(NULL, newsize,
243 PROT_READ|PROT_WRITE, MAP_SHARED, state_fd, 0)) ==
244 MAP_FAILED) {
245 rcm_log_message(RCM_ERROR,
246 gettext("cannot mmap %s: %s\n"),
247 RCM_STATE_FILE, strerror(errno));
248 rcmd_exit(errno);
251 /* Initialize the new entries */
252 for (i = (*listp)->n_req_max; i < n_req_max; i++) {
253 (*listp)->req[i].state = RCM_STATE_REMOVE;
254 (void) strcpy((*listp)->req[i].device, "");
257 (*listp)->n_req_max = n_req_max;
258 (*listp)->n_req++;
259 return (&(*listp)->req[n_req]);
263 * List contains empty slots, find it.
265 for (i = 0; i < n_req_max; i++) {
266 if (((*listp)->req[i].device[0] == '\0') ||
267 ((*listp)->req[i].state == RCM_STATE_REMOVE)) {
268 break;
272 assert(i < n_req_max); /* empty slot must exist */
274 (*listp)->n_req++;
275 return (&(*listp)->req[i]);
279 * When one resource depends on multiple resources, it's possible that
280 * rcm_get_info can be called multiple times on the resource, resulting
281 * in duplicate information. By assigning a unique sequence number to
282 * each rcm_get_info operation, this duplication can be eliminated.
284 * Insert a dr entry in info_req_list
287 info_req_add(char *rsrcname, uint_t flag, int seq_num)
289 int error = 0;
290 char *device;
291 req_t *req;
293 rcm_log_message(RCM_TRACE2, "info_req_add(%s, %d)\n",
294 rsrcname, seq_num);
296 device = resolve_name(rsrcname);
297 (void) mutex_lock(&rcm_req_lock);
300 * Look for entry with the same resource and seq_num.
301 * If it exists, we return an error so that such
302 * information is not gathered more than once.
304 if (find_req_entry(device, flag, seq_num, info_req_list) != NULL) {
305 rcm_log_message(RCM_DEBUG, "getinfo cycle: %s %d \n",
306 device, seq_num);
307 error = -1;
308 goto out;
312 * Get empty entry and fill in seq_num and device.
314 req = get_req_entry(&info_req_list);
315 req->seq_num = seq_num;
316 req->state = RCM_STATE_ONLINE; /* mark that the entry is in use */
317 req->flag = flag;
318 (void) strcpy(req->device, device);
320 out:
321 (void) mutex_unlock(&rcm_req_lock);
322 free(device);
324 return (error);
328 * Remove all entries associated with seq_num from info_req_list
330 void
331 info_req_remove(int seq_num)
333 int i;
335 rcm_log_message(RCM_TRACE3, "info_req_remove(%d)\n", seq_num);
337 seq_num >>= SEQ_NUM_SHIFT;
338 (void) mutex_lock(&rcm_req_lock);
340 /* remove all entries with seq_num */
341 for (i = 0; i < info_req_list->n_req_max; i++) {
342 if (info_req_list->req[i].state == RCM_STATE_REMOVE)
343 continue;
345 if ((info_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != seq_num)
346 continue;
348 info_req_list->req[i].state = RCM_STATE_REMOVE;
349 info_req_list->n_req--;
353 * We don't shrink the info_req_list size for now.
355 (void) mutex_unlock(&rcm_req_lock);
359 * Checking lock conflicts. There is a conflict if:
360 * - attempt to DR a node when either its ancester or descendent
361 * is in the process of DR
362 * - attempt to register for a node when its ancester is locked for DR
364 static int
365 check_lock(char *device, uint_t flag, int cflag, rcm_info_t **info)
367 int i, ret = RCM_SUCCESS;
369 if (info)
370 *info = NULL;
373 * During daemon initialization, don't check locks
375 if (dr_req_list == NULL)
376 return (ret);
378 for (i = 0; i < dr_req_list->n_req; i++) {
379 req_t *req = &dr_req_list->req[i];
380 char *dr_dev = req->device;
383 * Skip empty entries
385 if ((req->state == RCM_STATE_REMOVE) || (dr_dev[0] == '\0'))
386 continue;
389 * Make sure that none of the ancestors of dr_dev is
390 * being operated upon.
392 if (EQUAL(device, dr_dev) || DESCENDENT(device, dr_dev)) {
394 * An exception to this is the filesystem.
395 * We should allowed a filesystem rooted at a
396 * child directory to be unmounted.
398 if ((flag & RCM_FILESYS) && (!EQUAL(device, dr_dev) ||
399 ((dr_req_list->req[i].flag & RCM_FILESYS) == 0)))
400 continue;
402 assert(info != 0);
404 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid,
405 dr_req_list->req[i].state,
406 dr_req_list->req[i].seq_num, NULL, locked_info,
407 locked_err, NULL, info);
408 ret = RCM_CONFLICT;
409 break;
412 if ((cflag == LOCK_FOR_DR) && DESCENDENT(dr_dev, device)) {
414 * Check descendents only for DR request.
416 * Could have multiple descendents doing DR,
417 * we want to find them all.
419 assert(info != 0);
421 add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid,
422 dr_req_list->req[i].state,
423 dr_req_list->req[i].seq_num, NULL, locked_info,
424 locked_err, NULL, info);
425 ret = RCM_CONFLICT;
426 /* don't break here, need to find all conflicts */
430 return (ret);
434 * Check for lock conflicts for DR operation or client registration
437 rsrc_check_lock_conflicts(char *rsrcname, uint_t flag, int cflag,
438 rcm_info_t **info)
440 int result;
441 char *device;
443 device = resolve_name(rsrcname);
444 result = check_lock(device, flag, cflag, info);
445 free(device);
447 return (result);
450 static int
451 transition_state(int state)
454 * If the resource state is in transition, ask caller to
455 * try again.
457 switch (state) {
458 case RCM_STATE_OFFLINING:
459 case RCM_STATE_SUSPENDING:
460 case RCM_STATE_RESUMING:
461 case RCM_STATE_ONLINING:
462 case RCM_STATE_REMOVING:
464 return (1);
466 default:
467 /*FALLTHROUGH*/
468 break;
470 return (0);
474 * Update a dr entry in dr_req_list
476 /*ARGSUSED*/
477 static int
478 dr_req_update_entry(char *device, pid_t pid, uint_t flag, int state,
479 int seq_num, timespec_t *interval, rcm_info_t **infop)
481 req_t *req;
484 * Find request entry. If not found, return RCM_FAILURE
486 req = find_req_entry(device, flag, -1, dr_req_list);
488 if (req == NULL) {
489 switch (state) {
490 case RCM_STATE_OFFLINE_QUERYING:
491 case RCM_STATE_SUSPEND_QUERYING:
492 case RCM_STATE_OFFLINING:
493 case RCM_STATE_SUSPENDING:
494 /* could be re-do operation, no error message */
495 break;
497 default:
498 rcm_log_message(RCM_DEBUG,
499 "update non-existing resource %s\n", device);
501 return (RCM_FAILURE);
505 * During initialization, update is unconditional (forced)
506 * in order to bring the daemon up in a sane state.
508 if (rcmd_get_state() == RCMD_INIT)
509 goto update;
512 * Don't allow update with mismatched initiator pid. This could happen
513 * as part of normal operation.
515 if (pid != req->pid) {
516 rcm_log_message(RCM_INFO,
517 gettext("mismatched dr initiator pid: %ld %ld\n"),
518 req->pid, pid);
519 goto failure;
522 rcm_log_message(RCM_TRACE4,
523 "dr_req_update_entry: state=%d, device=%s\n",
524 req->state, req->device);
527 * Check that the state transition is valid
529 switch (state) {
530 case RCM_STATE_OFFLINE_QUERYING:
531 case RCM_STATE_OFFLINING:
533 * This is the case of re-offlining, which applies only
534 * if a previous attempt failed.
536 if ((req->state != RCM_STATE_OFFLINE_FAIL) &&
537 (req->state != RCM_STATE_OFFLINE_QUERYING) &&
538 (req->state != RCM_STATE_OFFLINE_QUERY) &&
539 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) &&
540 (req->state != RCM_STATE_OFFLINE)) {
541 rcm_log_message(RCM_WARNING,
542 gettext("%s: invalid offlining from state %d\n"),
543 device, req->state);
544 goto failure;
546 break;
548 case RCM_STATE_SUSPEND_QUERYING:
549 case RCM_STATE_SUSPENDING:
551 * This is the case of re-suspending, which applies only
552 * if a previous attempt failed.
554 if ((req->state != RCM_STATE_SUSPEND_FAIL) &&
555 (req->state != RCM_STATE_SUSPEND_QUERYING) &&
556 (req->state != RCM_STATE_SUSPEND_QUERY) &&
557 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) &&
558 (req->state != RCM_STATE_SUSPEND)) {
559 rcm_log_message(RCM_WARNING,
560 gettext("%s: invalid suspending from state %d\n"),
561 device, req->state);
562 goto failure;
564 break;
566 case RCM_STATE_RESUMING:
567 if ((req->state != RCM_STATE_SUSPEND) &&
568 (req->state != RCM_STATE_SUSPEND_QUERYING) &&
569 (req->state != RCM_STATE_SUSPEND_QUERY) &&
570 (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) &&
571 (req->state != RCM_STATE_SUSPEND_FAIL)) {
572 rcm_log_message(RCM_DEBUG,
573 "%s: invalid resuming from state %d\n",
574 device, req->state);
575 goto failure;
577 break;
579 case RCM_STATE_ONLINING:
580 if ((req->state != RCM_STATE_OFFLINE) &&
581 (req->state != RCM_STATE_OFFLINE_QUERYING) &&
582 (req->state != RCM_STATE_OFFLINE_QUERY) &&
583 (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) &&
584 (req->state != RCM_STATE_OFFLINE_FAIL)) {
585 rcm_log_message(RCM_INFO,
586 gettext("%s: invalid onlining from state %d\n"),
587 device, req->state);
588 goto failure;
590 break;
592 case RCM_STATE_REMOVING:
593 if ((req->state != RCM_STATE_OFFLINE) &&
594 (req->state != RCM_STATE_OFFLINE_FAIL)) {
595 rcm_log_message(RCM_INFO,
596 gettext("%s: invalid removing from state %d\n"),
597 device, req->state);
598 goto failure;
600 break;
602 case RCM_STATE_SUSPEND_FAIL:
603 assert(req->state == RCM_STATE_SUSPENDING);
604 break;
606 case RCM_STATE_OFFLINE_FAIL:
607 assert(req->state == RCM_STATE_OFFLINING);
608 break;
610 case RCM_STATE_SUSPEND:
611 assert(req->state == RCM_STATE_SUSPENDING);
612 break;
614 case RCM_STATE_OFFLINE:
615 assert(req->state == RCM_STATE_OFFLINING);
616 break;
618 case RCM_STATE_ONLINE:
619 assert((req->state == RCM_STATE_RESUMING) ||
620 (req->state == RCM_STATE_ONLINING));
621 break;
623 default: /* shouldn't be here */
624 rcm_log_message(RCM_ERROR,
625 gettext("invalid update to dr state: %d\n"), state);
626 return (RCM_FAILURE);
629 update:
631 * update the state, interval, and sequence number; sync state file
633 req->state = state;
634 req->seq_num = seq_num;
636 if (interval)
637 req->interval = *interval;
638 else
639 bzero(&req->interval, sizeof (timespec_t));
641 (void) fsync(state_fd);
642 return (RCM_SUCCESS);
644 failure:
645 if (infop != NULL) {
646 add_busy_rsrc_to_list(req->device, req->pid, req->state,
647 req->seq_num, NULL, locked_info, locked_err, NULL, infop);
651 * A request may be left in a transition state because the operator
652 * typed ctrl-C. In this case, the daemon thread continues to run
653 * and will eventually put the state in a non-transitional state.
655 * To be safe, we return EAGAIN to allow librcm to loop and retry.
656 * If we are called from a module, loop & retry could result in a
657 * deadlock. The called will check for this case and turn EAGAIN
658 * into RCM_CONFLICT.
660 if (transition_state(req->state)) {
661 return (EAGAIN);
664 return (RCM_CONFLICT);
668 * Insert a dr entry in dr_req_list
671 dr_req_add(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num,
672 timespec_t *interval, rcm_info_t **info)
674 int error;
675 char *device;
676 req_t *req;
678 rcm_log_message(RCM_TRACE3, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n",
679 rsrcname, pid, flag, state, seq_num, (void *)info);
681 device = resolve_name(rsrcname);
682 if (device == NULL)
683 return (EINVAL);
685 (void) mutex_lock(&rcm_req_lock);
688 * In the re-offline/suspend case, attempt to update dr request.
690 * If this succeeds, return success;
691 * If this fails because of a conflict, return error;
692 * If this this fails because no entry exists, add a new entry.
694 error = dr_req_update_entry(device, pid, flag, state, seq_num, interval,
695 info);
697 switch (error) {
698 case RCM_FAILURE:
699 /* proceed to add a new entry */
700 break;
702 case RCM_CONFLICT:
703 case RCM_SUCCESS:
704 case EAGAIN:
705 default:
706 goto out;
710 * Check for lock conflicts
712 error = check_lock(device, flag, LOCK_FOR_DR, info);
713 if (error != RCM_SUCCESS) {
714 error = RCM_CONFLICT;
715 goto out;
719 * Get empty request entry, fill in values and sync state file
721 req = get_req_entry(&dr_req_list);
723 req->seq_num = seq_num;
724 req->pid = pid;
725 req->flag = flag;
726 req->state = state;
727 req->type = rsrc_get_type(device);
728 (void) strcpy(req->device, device);
730 /* cache interval for failure recovery */
731 if (interval)
732 req->interval = *interval;
733 else
734 bzero(&req->interval, sizeof (timespec_t));
736 (void) fsync(state_fd);
739 * Add initiator pid to polling list
741 add_to_polling_list(req->pid);
743 out:
744 (void) mutex_unlock(&rcm_req_lock);
745 free(device);
747 return (error);
751 * Update a dr entry in dr_req_list
753 /*ARGSUSED*/
755 dr_req_update(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num,
756 rcm_info_t **info)
758 int error;
759 char *device = resolve_name(rsrcname);
761 rcm_log_message(RCM_TRACE3, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n",
762 rsrcname, pid, flag, state, seq_num);
764 (void) mutex_lock(&rcm_req_lock);
765 error = dr_req_update_entry(device, pid, flag, state, seq_num, NULL,
766 info);
767 (void) mutex_unlock(&rcm_req_lock);
768 free(device);
770 return (error);
774 * This function scans the DR request list for the next, non-removed
775 * entry that is part of the specified sequence. The 'device' name
776 * of the entry is copied into the provided 'rsrc' buffer.
778 * The 'rsrc' buffer is required because the DR request list is only
779 * locked during the duration of this lookup. Giving a direct pointer
780 * to something in the list would be unsafe.
783 dr_req_lookup(int seq_num, char *rsrc)
785 int i;
786 int len;
787 int base = (seq_num >> SEQ_NUM_SHIFT);
788 int retval = RCM_FAILURE;
790 if (rsrc == NULL) {
791 return (RCM_FAILURE);
794 (void) mutex_lock(&rcm_req_lock);
796 for (i = 0; i < dr_req_list->n_req_max; i++) {
798 /* Skip removed or non-matching entries */
799 if ((dr_req_list->req[i].state == RCM_STATE_REMOVE) ||
800 ((dr_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != base)) {
801 continue;
804 /* Copy the next-matching 'device' name into 'rsrc' */
805 len = strlcpy(rsrc, dr_req_list->req[i].device, MAXPATHLEN);
806 if (len < MAXPATHLEN) {
807 retval = RCM_SUCCESS;
809 break;
812 (void) mutex_unlock(&rcm_req_lock);
814 return (retval);
818 * Remove a dr entry in dr_req_list
820 void
821 dr_req_remove(char *rsrcname, uint_t flag)
823 req_t *req;
824 char *device = resolve_name(rsrcname);
826 rcm_log_message(RCM_TRACE3, "dr_req_remove(%s)\n", rsrcname);
828 (void) mutex_lock(&rcm_req_lock);
830 /* find entry */
831 req = find_req_entry(device, flag, -1, dr_req_list);
832 free(device);
834 if (req == NULL) {
835 (void) mutex_unlock(&rcm_req_lock);
836 rcm_log_message(RCM_WARNING,
837 gettext("dr_req entry %s not found\n"), rsrcname);
838 return;
841 req->state = RCM_STATE_REMOVE;
842 dr_req_list->n_req--;
843 (void) fsync(state_fd);
846 * remove pid from polling list
848 remove_from_polling_list(req->pid);
851 * We don't shrink the dr_req_list size for now.
852 * Shouldn't cause big memory leaks.
854 (void) mutex_unlock(&rcm_req_lock);
858 * Return the list of ongoing dr operation requests
860 rcm_info_t *
861 rsrc_dr_info()
863 int i;
864 rcm_info_t *info;
865 rcm_info_t *result = NULL;
866 char *rsrc;
867 int len;
869 rcm_log_message(RCM_TRACE2, "rsrc_dr_info()\n");
871 (void) mutex_lock(&rcm_req_lock);
872 for (i = 0; i < dr_req_list->n_req_max; i++) {
873 if (dr_req_list->req[i].state == RCM_STATE_REMOVE)
874 continue;
876 if (dr_req_list->req[i].device[0] == '\0')
877 continue;
879 if (dr_req_list->req[i].flag & RCM_FILESYS) {
880 len = strlen(dr_req_list->req[i].device) + 5;
881 rsrc = s_malloc(len);
882 (void) snprintf(rsrc, len, "%s(fs)",
883 dr_req_list->req[i].device);
884 } else {
885 rsrc = s_strdup(dr_req_list->req[i].device);
888 info = s_calloc(1, sizeof (*info));
889 if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) {
890 rcm_log_message(RCM_ERROR,
891 gettext("failed (nvlist_alloc=%s).\n"),
892 strerror(errno));
893 rcmd_exit(errno);
896 if (errno = nvlist_add_string(info->info, RCM_RSRCNAME, rsrc)) {
897 rcm_log_message(RCM_ERROR,
898 gettext("failed (nvlist_add=%s).\n"),
899 strerror(errno));
900 rcmd_exit(errno);
902 (void) free(rsrc);
904 if (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID,
905 dr_req_list->req[i].pid)) {
906 rcm_log_message(RCM_ERROR,
907 gettext("failed (nvlist_add=%s).\n"),
908 strerror(errno));
909 rcmd_exit(errno);
912 if (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM,
913 dr_req_list->req[i].seq_num)) {
914 rcm_log_message(RCM_ERROR,
915 gettext("failed (nvlist_add=%s).\n"),
916 strerror(errno));
917 rcmd_exit(errno);
920 if (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE,
921 dr_req_list->req[i].state)) {
922 rcm_log_message(RCM_ERROR,
923 gettext("failed (nvlist_add=%s).\n"),
924 strerror(errno));
925 rcmd_exit(errno);
928 if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO,
929 (char *)locked_info)) {
930 rcm_log_message(RCM_ERROR,
931 gettext("failed (nvlist_add=%s).\n"),
932 strerror(errno));
933 rcmd_exit(errno);
936 info->next = result;
937 result = info;
939 (void) mutex_unlock(&rcm_req_lock);
941 return (result);
945 * Eliminate entries whose dr initiator is no longer running
946 * and recover daemon state during daemon restart.
948 * This routine is called from either during daemon initialization
949 * after all modules have registered resources or from the cleanup
950 * thread. In either case, it is the only thread running in the
951 * daemon.
953 void
954 clean_dr_list()
956 int i;
957 struct clean_list {
958 struct clean_list *next;
959 char *rsrcname;
960 pid_t pid;
961 int seq_num;
962 int state;
963 timespec_t interval;
964 } *tmp, *list = NULL;
965 char *rsrcnames[2];
967 rcm_log_message(RCM_TRACE3,
968 "clean_dr_list(): look for stale dr initiators\n");
970 rsrcnames[1] = NULL;
973 * Make a list of entries to recover. This is necessary because
974 * the recovery operation will modify dr_req_list.
976 (void) mutex_lock(&rcm_req_lock);
977 for (i = 0; i < dr_req_list->n_req_max; i++) {
978 /* skip empty entries */
979 if (dr_req_list->req[i].state == RCM_STATE_REMOVE)
980 continue;
982 if (dr_req_list->req[i].device[0] == '\0')
983 continue;
985 /* skip cascade operations */
986 if (dr_req_list->req[i].seq_num & SEQ_NUM_MASK)
987 continue;
990 * In the cleanup case, ignore entries with initiators alive
992 if ((rcmd_get_state() == RCMD_CLEANUP) &&
993 proc_exist(dr_req_list->req[i].pid))
994 continue;
996 rcm_log_message(RCM_TRACE1,
997 "found stale entry: %s\n", dr_req_list->req[i].device);
999 tmp = s_malloc(sizeof (*tmp));
1000 tmp->rsrcname = s_strdup(dr_req_list->req[i].device);
1001 tmp->state = dr_req_list->req[i].state;
1002 tmp->pid = dr_req_list->req[i].pid;
1003 tmp->seq_num = dr_req_list->req[i].seq_num;
1004 tmp->interval = dr_req_list->req[i].interval;
1005 tmp->next = list;
1006 list = tmp;
1008 (void) mutex_unlock(&rcm_req_lock);
1010 if (list == NULL)
1011 return;
1014 * If everything worked normally, we shouldn't be here.
1015 * Since we are here, something went wrong, so say something.
1017 if (rcmd_get_state() == RCMD_INIT) {
1018 rcm_log_message(RCM_NOTICE, gettext("rcm_daemon died "
1019 "unexpectedly, recovering previous daemon state\n"));
1020 } else {
1021 rcm_log_message(RCM_INFO, gettext("one or more dr initiator "
1022 "died, attempting automatic recovery\n"));
1025 while (list) {
1026 tmp = list;
1027 list = tmp->next;
1029 switch (tmp->state) {
1030 case RCM_STATE_OFFLINE_QUERY:
1031 case RCM_STATE_OFFLINE_QUERY_FAIL:
1032 rsrcnames[0] = tmp->rsrcname;
1033 if (proc_exist(tmp->pid)) {
1034 /* redo */
1035 (void) process_resource_offline(rsrcnames,
1036 tmp->pid, RCM_QUERY, tmp->seq_num, NULL);
1037 } else {
1038 /* undo */
1039 (void) notify_resource_online(rsrcnames,
1040 tmp->pid, 0, tmp->seq_num, NULL);
1042 break;
1044 case RCM_STATE_OFFLINE:
1045 case RCM_STATE_OFFLINE_FAIL:
1046 rsrcnames[0] = tmp->rsrcname;
1047 if (proc_exist(tmp->pid)) {
1048 /* redo */
1049 (void) process_resource_offline(rsrcnames,
1050 tmp->pid, 0, tmp->seq_num, NULL);
1051 } else {
1052 /* undo */
1053 (void) notify_resource_online(rsrcnames,
1054 tmp->pid, 0, tmp->seq_num, NULL);
1056 break;
1058 case RCM_STATE_SUSPEND_QUERY:
1059 case RCM_STATE_SUSPEND_QUERY_FAIL:
1060 rsrcnames[0] = tmp->rsrcname;
1061 if (proc_exist(tmp->pid)) {
1062 /* redo */
1063 (void) process_resource_suspend(rsrcnames,
1064 tmp->pid, RCM_QUERY, tmp->seq_num,
1065 &tmp->interval, NULL);
1066 } else {
1067 /* undo */
1068 (void) notify_resource_resume(rsrcnames,
1069 tmp->pid, 0, tmp->seq_num, NULL);
1071 break;
1073 case RCM_STATE_SUSPEND:
1074 case RCM_STATE_SUSPEND_FAIL:
1075 rsrcnames[0] = tmp->rsrcname;
1076 if (proc_exist(tmp->pid)) {
1077 /* redo */
1078 (void) process_resource_suspend(rsrcnames,
1079 tmp->pid, 0, tmp->seq_num, &tmp->interval,
1080 NULL);
1081 } else {
1082 /* undo */
1083 (void) notify_resource_resume(rsrcnames,
1084 tmp->pid, 0, tmp->seq_num, NULL);
1086 break;
1088 case RCM_STATE_OFFLINING:
1089 case RCM_STATE_ONLINING:
1090 rsrcnames[0] = tmp->rsrcname;
1091 (void) notify_resource_online(rsrcnames, tmp->pid, 0,
1092 tmp->seq_num, NULL);
1093 break;
1095 case RCM_STATE_SUSPENDING:
1096 case RCM_STATE_RESUMING:
1097 rsrcnames[0] = tmp->rsrcname;
1098 (void) notify_resource_resume(rsrcnames, tmp->pid, 0,
1099 tmp->seq_num, NULL);
1100 break;
1102 case RCM_STATE_REMOVING:
1103 rsrcnames[0] = tmp->rsrcname;
1104 (void) notify_resource_remove(rsrcnames, tmp->pid, 0,
1105 tmp->seq_num, NULL);
1106 break;
1108 default:
1109 rcm_log_message(RCM_WARNING,
1110 gettext("%s in unknown state %d\n"),
1111 tmp->rsrcname, tmp->state);
1112 break;
1114 free(tmp->rsrcname);
1115 free(tmp);
1120 * Selected thread blocking based on event type
1122 barrier_t barrier;
1125 * Change barrier state:
1126 * RCMD_INIT - daemon is intializing, only register allowed
1127 * RCMD_NORMAL - normal daemon processing
1128 * RCMD_CLEANUP - cleanup thread is waiting or running
1131 rcmd_get_state()
1133 return (barrier.state);
1136 void
1137 rcmd_set_state(int state)
1140 * The state transition is as follows:
1141 * INIT --> NORMAL <---> CLEANUP
1142 * The implementation favors the cleanup thread
1145 (void) mutex_lock(&barrier.lock);
1146 barrier.state = state;
1148 switch (state) {
1149 case RCMD_CLEANUP:
1151 * Wait for existing threads to exit
1153 barrier.wanted++;
1154 while (barrier.thr_count != 0)
1155 (void) cond_wait(&barrier.cv, &barrier.lock);
1156 barrier.wanted--;
1157 barrier.thr_count = -1;
1158 break;
1160 case RCMD_INIT:
1161 case RCMD_NORMAL:
1162 default:
1163 if (barrier.thr_count == -1)
1164 barrier.thr_count = 0;
1165 if (barrier.wanted)
1166 (void) cond_broadcast(&barrier.cv);
1167 break;
1170 (void) mutex_unlock(&barrier.lock);
1174 * Increment daemon thread count
1177 rcmd_thr_incr(int cmd)
1179 int seq_num;
1181 (void) mutex_lock(&barrier.lock);
1183 * Set wanted flag
1185 barrier.wanted++;
1188 * Wait till it is safe for daemon to perform the operation
1190 * NOTE: if a module registers by passing a request to the
1191 * client proccess, we may need to allow register
1192 * to come through during daemon initialization.
1194 while (barrier.state != RCMD_NORMAL)
1195 (void) cond_wait(&barrier.cv, &barrier.lock);
1197 if ((cmd == CMD_EVENT) ||
1198 (cmd == CMD_REGISTER) ||
1199 (cmd == CMD_UNREGISTER)) {
1201 * Event passthru and register ops don't need sequence number
1203 seq_num = -1;
1204 } else {
1206 * Non register operation gets a sequence number
1208 seq_num = get_seq_number();
1210 barrier.wanted--;
1211 barrier.thr_count++;
1212 (void) mutex_unlock(&barrier.lock);
1214 if ((cmd == CMD_OFFLINE) ||
1215 (cmd == CMD_SUSPEND) ||
1216 (cmd == CMD_GETINFO)) {
1218 * For these operations, need to ask modules to
1219 * register any new resources that came online.
1221 * This is because mount/umount are not instrumented
1222 * to register with rcm before using system resources.
1223 * Certain registration ops may fail during sync, which
1224 * indicates race conditions. This cannot be avoided
1225 * without changing mount/umount.
1227 rcmd_db_sync();
1230 return (seq_num);
1234 * Decrement thread count
1236 void
1237 rcmd_thr_decr()
1240 * Decrement thread count and wake up reload/cleanup thread.
1242 (void) mutex_lock(&barrier.lock);
1243 barrier.last_update = time(NULL);
1244 if (--barrier.thr_count == 0)
1245 (void) cond_broadcast(&barrier.cv);
1246 (void) mutex_unlock(&barrier.lock);
1250 * Wakeup all waiting threads as a result of SIGHUP
1252 static int sighup_received = 0;
1254 void
1255 rcmd_thr_signal()
1257 (void) mutex_lock(&barrier.lock);
1258 sighup_received = 1;
1259 (void) cond_broadcast(&barrier.cv);
1260 (void) mutex_unlock(&barrier.lock);
1263 void
1264 rcmd_start_timer(int timeout)
1266 timestruc_t abstime;
1268 if (timeout == 0)
1269 timeout = RCM_DAEMON_TIMEOUT; /* default to 5 minutes */
1270 else
1271 dr_req_list->idle_timeout = timeout; /* persist timeout */
1273 if (timeout > 0) {
1274 abstime.tv_sec = time(NULL) + timeout;
1277 (void) mutex_lock(&barrier.lock);
1278 for (;;) {
1279 int idletime;
1280 int is_active;
1282 if (timeout > 0)
1283 (void) cond_timedwait(&barrier.cv, &barrier.lock,
1284 &abstime);
1285 else
1286 (void) cond_wait(&barrier.cv, &barrier.lock);
1289 * If sighup received, change timeout to 0 so the daemon is
1290 * shut down at the first possible moment
1292 if (sighup_received)
1293 timeout = 0;
1296 * If timeout is negative, never shutdown the daemon
1298 if (timeout < 0)
1299 continue;
1302 * Check for ongoing/pending activity
1304 is_active = (barrier.thr_count || barrier.wanted ||
1305 (dr_req_list->n_req != 0));
1306 if (is_active) {
1307 abstime.tv_sec = time(NULL) + timeout;
1308 continue;
1312 * If idletime is less than timeout, continue to wait
1314 idletime = time(NULL) - barrier.last_update;
1315 if (idletime < timeout) {
1316 abstime.tv_sec = barrier.last_update + timeout;
1317 continue;
1319 break;
1322 (void) script_main_fini();
1324 rcm_log_message(RCM_INFO, gettext("rcm_daemon is shut down.\n"));
1328 * Code related to polling client pid's
1329 * Not declared as static so that we can find this structure easily
1330 * in the core file.
1332 struct {
1333 int n_pids;
1334 int n_max_pids;
1335 thread_t poll_tid; /* poll thread id */
1336 int signaled;
1337 pid_t *pids;
1338 int *refcnt;
1339 struct pollfd *fds;
1340 cond_t cv; /* the associated lock is rcm_req_lock */
1341 } polllist;
1343 static int
1344 find_pid_index(pid_t pid)
1346 int i;
1348 for (i = 0; i < polllist.n_pids; i++) {
1349 if (polllist.pids[i] == pid) {
1350 return (i);
1353 return (-1);
1357 * Resize buffer for new pids
1359 static int
1360 get_pid_index()
1362 const int n_chunk = 10;
1364 int n_max;
1365 int index = polllist.n_pids;
1367 if (polllist.n_pids < polllist.n_max_pids) {
1368 polllist.n_pids++;
1369 return (index);
1372 if (polllist.n_max_pids == 0) {
1373 n_max = n_chunk;
1374 polllist.pids = s_calloc(n_max, sizeof (pid_t));
1375 polllist.refcnt = s_calloc(n_max, sizeof (int));
1376 polllist.fds = s_calloc(n_max, sizeof (struct pollfd));
1377 } else {
1378 n_max = polllist.n_max_pids + n_chunk;
1379 polllist.pids = s_realloc(polllist.pids,
1380 n_max * sizeof (pid_t));
1381 polllist.refcnt = s_realloc(polllist.refcnt,
1382 n_max * sizeof (int));
1383 polllist.fds = s_realloc(polllist.fds,
1384 n_max * sizeof (struct pollfd));
1386 polllist.n_max_pids = n_max;
1387 polllist.n_pids++;
1388 return (index);
1392 * rcm_req_lock must be held
1394 static void
1395 add_to_polling_list(pid_t pid)
1397 int fd, index;
1398 char procfile[MAXPATHLEN];
1400 if (pid == (pid_t)0)
1401 return;
1403 rcm_log_message(RCM_TRACE1, "add_to_polling_list(%ld)\n", pid);
1406 * Need to stop the poll thread before manipulating the polllist
1407 * since poll thread may possibly be using polllist.fds[] and
1408 * polllist.n_pids. As an optimization, first check if the pid
1409 * is already in the polllist. If it is, there is no need to
1410 * stop the poll thread. Just increment the pid reference count
1411 * and return;
1413 index = find_pid_index(pid);
1414 if (index != -1) {
1415 polllist.refcnt[index]++;
1416 return;
1419 stop_polling_thread();
1422 * In an attempt to stop the poll thread we may have released
1423 * and reacquired rcm_req_lock. So find the index again.
1425 index = find_pid_index(pid);
1426 if (index != -1) {
1427 polllist.refcnt[index]++;
1428 goto done;
1432 * Open a /proc file
1434 (void) sprintf(procfile, "/proc/%ld/as", pid);
1435 if ((fd = open(procfile, O_RDONLY)) == -1) {
1436 rcm_log_message(RCM_NOTICE, gettext("open(%s): %s\n"),
1437 procfile, strerror(errno));
1438 goto done;
1442 * add pid to polllist
1444 index = get_pid_index();
1445 polllist.pids[index] = pid;
1446 polllist.refcnt[index] = 1;
1447 polllist.fds[index].fd = fd;
1448 polllist.fds[index].events = 0;
1449 polllist.fds[index].revents = 0;
1451 rcm_log_message(RCM_DEBUG, "add pid %ld at index %ld\n", pid, index);
1453 done:
1454 start_polling_thread();
1458 * rcm_req_lock must be held
1460 static void
1461 remove_from_polling_list(pid_t pid)
1463 int i, index;
1465 if (pid == (pid_t)0)
1466 return;
1468 rcm_log_message(RCM_TRACE1, "remove_from_polling_list(%ld)\n", pid);
1471 * Need to stop the poll thread before manipulating the polllist
1472 * since poll thread may possibly be using polllist.fds[] and
1473 * polllist.n_pids. As an optimization, first check the pid
1474 * reference count. If the pid reference count is greater than 1
1475 * there is no need to stop the polling thread.
1478 index = find_pid_index(pid);
1479 if (index == -1) {
1480 rcm_log_message(RCM_NOTICE,
1481 gettext("error removing pid %ld from polling list\n"), pid);
1482 return;
1486 * decrement the pid refcnt
1488 if (polllist.refcnt[index] > 1) {
1489 polllist.refcnt[index]--;
1490 return;
1493 stop_polling_thread();
1496 * In an attempt to stop the poll thread we may have released
1497 * and reacquired rcm_req_lock. So find the index again.
1499 index = find_pid_index(pid);
1500 if (index == -1) {
1501 rcm_log_message(RCM_NOTICE,
1502 gettext("error removing pid %ld from polling list\n"), pid);
1503 goto done;
1506 if (--polllist.refcnt[index] > 0)
1507 goto done;
1510 * refcnt down to zero, delete pid from polling list
1512 (void) close(polllist.fds[index].fd);
1513 polllist.n_pids--;
1515 for (i = index; i < polllist.n_pids; i++) {
1516 polllist.pids[i] = polllist.pids[i + 1];
1517 polllist.refcnt[i] = polllist.refcnt[i + 1];
1518 bcopy(&polllist.fds[i + 1], &polllist.fds[i],
1519 sizeof (struct pollfd));
1522 rcm_log_message(RCM_DEBUG, "remove pid %ld at index %d\n", pid, index);
1524 done:
1525 start_polling_thread();
1528 void
1529 init_poll_thread()
1531 polllist.poll_tid = (thread_t)-1;
1534 void
1535 cleanup_poll_thread()
1537 (void) mutex_lock(&rcm_req_lock);
1538 if (polllist.poll_tid == thr_self()) {
1539 rcm_log_message(RCM_TRACE2,
1540 "cleanup_poll_thread: n_pids = %d\n", polllist.n_pids);
1541 polllist.poll_tid = (thread_t)-1;
1542 (void) cond_broadcast(&polllist.cv);
1544 (void) mutex_unlock(&rcm_req_lock);
1547 /*ARGSUSED*/
1548 static void *
1549 pollfunc(void *arg)
1551 sigset_t mask;
1553 rcm_log_message(RCM_TRACE2, "poll thread started. n_pids = %d\n",
1554 polllist.n_pids);
1557 * Unblock SIGUSR1 to allow polling thread to be killed
1559 (void) sigemptyset(&mask);
1560 (void) sigaddset(&mask, SIGUSR1);
1561 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
1563 (void) poll(polllist.fds, polllist.n_pids, (time_t)-1);
1566 * block SIGUSR1 to avoid being killed while holding a lock
1568 (void) sigemptyset(&mask);
1569 (void) sigaddset(&mask, SIGUSR1);
1570 (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
1572 rcm_log_message(RCM_TRACE2, "returned from poll()\n");
1574 cleanup_poll_thread();
1576 (void) mutex_lock(&barrier.lock);
1577 need_cleanup = 1;
1578 (void) cond_broadcast(&barrier.cv);
1579 (void) mutex_unlock(&barrier.lock);
1581 return (NULL);
1585 * rcm_req_lock must be held
1587 void
1588 start_polling_thread()
1590 int err;
1592 if (rcmd_get_state() != RCMD_NORMAL)
1593 return;
1595 if (polllist.poll_tid != (thread_t)-1 || polllist.n_pids == 0)
1596 return;
1598 if ((err = thr_create(NULL, 0, pollfunc, NULL, THR_DETACHED,
1599 &polllist.poll_tid)) == 0)
1600 polllist.signaled = 0;
1601 else
1602 rcm_log_message(RCM_ERROR,
1603 gettext("failed to create polling thread: %s\n"),
1604 strerror(err));
1608 * rcm_req_lock must be held
1610 static void
1611 stop_polling_thread()
1613 int err;
1615 while (polllist.poll_tid != (thread_t)-1) {
1616 if (polllist.signaled == 0) {
1617 if ((err = thr_kill(polllist.poll_tid, SIGUSR1)) == 0)
1618 polllist.signaled = 1;
1619 else
1621 * thr_kill shouldn't have failed since the
1622 * poll thread id and the signal are valid.
1623 * So log an error. Since when thr_kill
1624 * fails no signal is sent (as per man page),
1625 * the cond_wait below will wait until the
1626 * the poll thread exits by some other means.
1627 * The poll thread, for example, exits on its
1628 * own when any DR initiator process that it
1629 * is currently polling exits.
1631 rcm_log_message(RCM_ERROR,
1632 gettext(
1633 "fail to kill polling thread %d: %s\n"),
1634 polllist.poll_tid, strerror(err));
1636 (void) cond_wait(&polllist.cv, &rcm_req_lock);