4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include "rcm_module.h"
34 mutex_t rcm_req_lock
; /* protects global dr & info request list */
40 #define RCM_STATE_FILE "/var/run/rcm_daemon_state"
41 #define N_REQ_CHUNK 10 /* grow 10 entries at a time */
44 * Daemon timeout value
46 #define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */
49 * Struct for a list of outstanding rcm requests
52 int seq_num
; /* sequence number of request */
53 int state
; /* current state */
54 pid_t pid
; /* pid of initiator */
55 uint_t flag
; /* request flags */
56 int type
; /* resource(device) type */
57 timespec_t interval
; /* suspend interval */
58 char device
[MAXPATHLEN
]; /* name of device or resource */
63 int n_req_max
; /* number of req_t's to follow */
64 int n_seq_max
; /* last sequence number */
65 int idle_timeout
; /* persist idle timeout value */
67 /* more req_t follows */
70 static req_list_t
*dr_req_list
;
71 static req_list_t
*info_req_list
;
73 static const char *locked_info
= "DR operation in progress";
74 static const char *locked_err
= "Resource is busy";
76 static int rcmd_get_state();
77 static void add_to_polling_list(pid_t
);
78 static void remove_from_polling_list(pid_t
);
80 void start_polling_thread();
81 static void stop_polling_thread();
84 * Initialize request lists required for locking
93 * Start info list with one slot, then grow on demand.
95 info_req_list
= s_calloc(1, sizeof (req_list_t
));
96 info_req_list
->n_req_max
= 1;
99 * Open daemon state file and map in contents
101 state_fd
= open(RCM_STATE_FILE
, O_CREAT
|O_RDWR
, 0600);
102 if (state_fd
== -1) {
103 rcm_log_message(RCM_ERROR
, gettext("cannot open %s: %s\n"),
104 RCM_STATE_FILE
, strerror(errno
));
108 if (fstat(state_fd
, &fbuf
) != 0) {
109 rcm_log_message(RCM_ERROR
, gettext("cannot stat %s: %s\n"),
110 RCM_STATE_FILE
, strerror(errno
));
116 size
= sizeof (req_list_t
);
117 if (ftruncate(state_fd
, size
) != 0) {
118 rcm_log_message(RCM_ERROR
,
119 gettext("cannot truncate %s: %s\n"),
120 RCM_STATE_FILE
, strerror(errno
));
126 dr_req_list
= (req_list_t
*)mmap(NULL
, size
, PROT_READ
|PROT_WRITE
,
127 MAP_SHARED
, state_fd
, 0);
128 if (dr_req_list
== MAP_FAILED
) {
129 rcm_log_message(RCM_ERROR
, gettext("cannot mmap %s: %s\n"),
130 RCM_STATE_FILE
, strerror(errno
));
135 * Initial size is one entry
137 if (dr_req_list
->n_req_max
== 0) {
138 dr_req_list
->n_req_max
= 1;
139 (void) fsync(state_fd
);
143 rcm_log_message(RCM_DEBUG
, "n_req = %d, n_req_max = %d\n",
144 dr_req_list
->n_req
, dr_req_list
->n_req_max
);
147 * Recover the daemon state
153 * Get a unique sequence number--to be called with rcm_req_lock held.
160 if (dr_req_list
== NULL
)
163 dr_req_list
->n_seq_max
++;
164 number
= (dr_req_list
->n_seq_max
<< SEQ_NUM_SHIFT
);
165 (void) fsync(state_fd
);
171 * Find entry in list with the same resource name and sequence number.
172 * If seq_num == -1, no seq_num matching is required.
175 find_req_entry(char *device
, uint_t flag
, int seq_num
, req_list_t
*list
)
180 * Look for entry with the same resource and seq_num.
181 * Also match RCM_FILESYS field in flag.
183 for (i
= 0; i
< list
->n_req_max
; i
++) {
184 if (list
->req
[i
].state
== RCM_STATE_REMOVE
)
188 * We need to distiguish a file system root from the directory
191 * Applications are not aware of any difference between the
192 * two, but the system keeps track of it internally by
193 * checking for mount points while traversing file path.
194 * In a similar spirit, RCM is keeping this difference as
195 * an implementation detail.
197 if ((strcmp(device
, list
->req
[i
].device
) != 0) ||
198 (list
->req
[i
].flag
& RCM_FILESYS
) != (flag
& RCM_FILESYS
))
199 /* different resource */
202 if ((seq_num
!= -1) && ((seq_num
>> SEQ_NUM_SHIFT
) !=
203 (list
->req
[i
].seq_num
>> SEQ_NUM_SHIFT
)))
204 /* different base seqnum */
207 return (&list
->req
[i
]);
214 * Get the next empty req_t entry. If no entry exists, grow the list.
217 get_req_entry(req_list_t
**listp
)
220 int n_req
= (*listp
)->n_req
;
221 int n_req_max
= (*listp
)->n_req_max
;
224 * If the list is full, grow the list and return the first
225 * entry in the new portion.
227 if (n_req
== n_req_max
) {
230 n_req_max
+= N_REQ_CHUNK
;
231 newsize
= sizeof (req_list_t
) + (n_req_max
- 1) *
234 if (listp
== &info_req_list
) {
235 *listp
= s_realloc(*listp
, newsize
);
236 } else if (ftruncate(state_fd
, newsize
) != 0) {
237 rcm_log_message(RCM_ERROR
,
238 gettext("cannot truncate %s: %s\n"),
239 RCM_STATE_FILE
, strerror(errno
));
242 } else if ((*listp
= (req_list_t
*)mmap(NULL
, newsize
,
243 PROT_READ
|PROT_WRITE
, MAP_SHARED
, state_fd
, 0)) ==
245 rcm_log_message(RCM_ERROR
,
246 gettext("cannot mmap %s: %s\n"),
247 RCM_STATE_FILE
, strerror(errno
));
251 /* Initialize the new entries */
252 for (i
= (*listp
)->n_req_max
; i
< n_req_max
; i
++) {
253 (*listp
)->req
[i
].state
= RCM_STATE_REMOVE
;
254 (void) strcpy((*listp
)->req
[i
].device
, "");
257 (*listp
)->n_req_max
= n_req_max
;
259 return (&(*listp
)->req
[n_req
]);
263 * List contains empty slots, find it.
265 for (i
= 0; i
< n_req_max
; i
++) {
266 if (((*listp
)->req
[i
].device
[0] == '\0') ||
267 ((*listp
)->req
[i
].state
== RCM_STATE_REMOVE
)) {
272 assert(i
< n_req_max
); /* empty slot must exist */
275 return (&(*listp
)->req
[i
]);
279 * When one resource depends on multiple resources, it's possible that
280 * rcm_get_info can be called multiple times on the resource, resulting
281 * in duplicate information. By assigning a unique sequence number to
282 * each rcm_get_info operation, this duplication can be eliminated.
284 * Insert a dr entry in info_req_list
287 info_req_add(char *rsrcname
, uint_t flag
, int seq_num
)
293 rcm_log_message(RCM_TRACE2
, "info_req_add(%s, %d)\n",
296 device
= resolve_name(rsrcname
);
297 (void) mutex_lock(&rcm_req_lock
);
300 * Look for entry with the same resource and seq_num.
301 * If it exists, we return an error so that such
302 * information is not gathered more than once.
304 if (find_req_entry(device
, flag
, seq_num
, info_req_list
) != NULL
) {
305 rcm_log_message(RCM_DEBUG
, "getinfo cycle: %s %d \n",
312 * Get empty entry and fill in seq_num and device.
314 req
= get_req_entry(&info_req_list
);
315 req
->seq_num
= seq_num
;
316 req
->state
= RCM_STATE_ONLINE
; /* mark that the entry is in use */
318 (void) strcpy(req
->device
, device
);
321 (void) mutex_unlock(&rcm_req_lock
);
328 * Remove all entries associated with seq_num from info_req_list
331 info_req_remove(int seq_num
)
335 rcm_log_message(RCM_TRACE3
, "info_req_remove(%d)\n", seq_num
);
337 seq_num
>>= SEQ_NUM_SHIFT
;
338 (void) mutex_lock(&rcm_req_lock
);
340 /* remove all entries with seq_num */
341 for (i
= 0; i
< info_req_list
->n_req_max
; i
++) {
342 if (info_req_list
->req
[i
].state
== RCM_STATE_REMOVE
)
345 if ((info_req_list
->req
[i
].seq_num
>> SEQ_NUM_SHIFT
) != seq_num
)
348 info_req_list
->req
[i
].state
= RCM_STATE_REMOVE
;
349 info_req_list
->n_req
--;
353 * We don't shrink the info_req_list size for now.
355 (void) mutex_unlock(&rcm_req_lock
);
359 * Checking lock conflicts. There is a conflict if:
360 * - attempt to DR a node when either its ancester or descendent
361 * is in the process of DR
362 * - attempt to register for a node when its ancester is locked for DR
365 check_lock(char *device
, uint_t flag
, int cflag
, rcm_info_t
**info
)
367 int i
, ret
= RCM_SUCCESS
;
373 * During daemon initialization, don't check locks
375 if (dr_req_list
== NULL
)
378 for (i
= 0; i
< dr_req_list
->n_req
; i
++) {
379 req_t
*req
= &dr_req_list
->req
[i
];
380 char *dr_dev
= req
->device
;
385 if ((req
->state
== RCM_STATE_REMOVE
) || (dr_dev
[0] == '\0'))
389 * Make sure that none of the ancestors of dr_dev is
390 * being operated upon.
392 if (EQUAL(device
, dr_dev
) || DESCENDENT(device
, dr_dev
)) {
394 * An exception to this is the filesystem.
395 * We should allowed a filesystem rooted at a
396 * child directory to be unmounted.
398 if ((flag
& RCM_FILESYS
) && (!EQUAL(device
, dr_dev
) ||
399 ((dr_req_list
->req
[i
].flag
& RCM_FILESYS
) == 0)))
404 add_busy_rsrc_to_list(dr_dev
, dr_req_list
->req
[i
].pid
,
405 dr_req_list
->req
[i
].state
,
406 dr_req_list
->req
[i
].seq_num
, NULL
, locked_info
,
407 locked_err
, NULL
, info
);
412 if ((cflag
== LOCK_FOR_DR
) && DESCENDENT(dr_dev
, device
)) {
414 * Check descendents only for DR request.
416 * Could have multiple descendents doing DR,
417 * we want to find them all.
421 add_busy_rsrc_to_list(dr_dev
, dr_req_list
->req
[i
].pid
,
422 dr_req_list
->req
[i
].state
,
423 dr_req_list
->req
[i
].seq_num
, NULL
, locked_info
,
424 locked_err
, NULL
, info
);
426 /* don't break here, need to find all conflicts */
434 * Check for lock conflicts for DR operation or client registration
437 rsrc_check_lock_conflicts(char *rsrcname
, uint_t flag
, int cflag
,
443 device
= resolve_name(rsrcname
);
444 result
= check_lock(device
, flag
, cflag
, info
);
451 transition_state(int state
)
454 * If the resource state is in transition, ask caller to
458 case RCM_STATE_OFFLINING
:
459 case RCM_STATE_SUSPENDING
:
460 case RCM_STATE_RESUMING
:
461 case RCM_STATE_ONLINING
:
462 case RCM_STATE_REMOVING
:
474 * Update a dr entry in dr_req_list
478 dr_req_update_entry(char *device
, pid_t pid
, uint_t flag
, int state
,
479 int seq_num
, timespec_t
*interval
, rcm_info_t
**infop
)
484 * Find request entry. If not found, return RCM_FAILURE
486 req
= find_req_entry(device
, flag
, -1, dr_req_list
);
490 case RCM_STATE_OFFLINE_QUERYING
:
491 case RCM_STATE_SUSPEND_QUERYING
:
492 case RCM_STATE_OFFLINING
:
493 case RCM_STATE_SUSPENDING
:
494 /* could be re-do operation, no error message */
498 rcm_log_message(RCM_DEBUG
,
499 "update non-existing resource %s\n", device
);
501 return (RCM_FAILURE
);
505 * During initialization, update is unconditional (forced)
506 * in order to bring the daemon up in a sane state.
508 if (rcmd_get_state() == RCMD_INIT
)
512 * Don't allow update with mismatched initiator pid. This could happen
513 * as part of normal operation.
515 if (pid
!= req
->pid
) {
516 rcm_log_message(RCM_INFO
,
517 gettext("mismatched dr initiator pid: %ld %ld\n"),
522 rcm_log_message(RCM_TRACE4
,
523 "dr_req_update_entry: state=%d, device=%s\n",
524 req
->state
, req
->device
);
527 * Check that the state transition is valid
530 case RCM_STATE_OFFLINE_QUERYING
:
531 case RCM_STATE_OFFLINING
:
533 * This is the case of re-offlining, which applies only
534 * if a previous attempt failed.
536 if ((req
->state
!= RCM_STATE_OFFLINE_FAIL
) &&
537 (req
->state
!= RCM_STATE_OFFLINE_QUERYING
) &&
538 (req
->state
!= RCM_STATE_OFFLINE_QUERY
) &&
539 (req
->state
!= RCM_STATE_OFFLINE_QUERY_FAIL
) &&
540 (req
->state
!= RCM_STATE_OFFLINE
)) {
541 rcm_log_message(RCM_WARNING
,
542 gettext("%s: invalid offlining from state %d\n"),
548 case RCM_STATE_SUSPEND_QUERYING
:
549 case RCM_STATE_SUSPENDING
:
551 * This is the case of re-suspending, which applies only
552 * if a previous attempt failed.
554 if ((req
->state
!= RCM_STATE_SUSPEND_FAIL
) &&
555 (req
->state
!= RCM_STATE_SUSPEND_QUERYING
) &&
556 (req
->state
!= RCM_STATE_SUSPEND_QUERY
) &&
557 (req
->state
!= RCM_STATE_SUSPEND_QUERY_FAIL
) &&
558 (req
->state
!= RCM_STATE_SUSPEND
)) {
559 rcm_log_message(RCM_WARNING
,
560 gettext("%s: invalid suspending from state %d\n"),
566 case RCM_STATE_RESUMING
:
567 if ((req
->state
!= RCM_STATE_SUSPEND
) &&
568 (req
->state
!= RCM_STATE_SUSPEND_QUERYING
) &&
569 (req
->state
!= RCM_STATE_SUSPEND_QUERY
) &&
570 (req
->state
!= RCM_STATE_SUSPEND_QUERY_FAIL
) &&
571 (req
->state
!= RCM_STATE_SUSPEND_FAIL
)) {
572 rcm_log_message(RCM_DEBUG
,
573 "%s: invalid resuming from state %d\n",
579 case RCM_STATE_ONLINING
:
580 if ((req
->state
!= RCM_STATE_OFFLINE
) &&
581 (req
->state
!= RCM_STATE_OFFLINE_QUERYING
) &&
582 (req
->state
!= RCM_STATE_OFFLINE_QUERY
) &&
583 (req
->state
!= RCM_STATE_OFFLINE_QUERY_FAIL
) &&
584 (req
->state
!= RCM_STATE_OFFLINE_FAIL
)) {
585 rcm_log_message(RCM_INFO
,
586 gettext("%s: invalid onlining from state %d\n"),
592 case RCM_STATE_REMOVING
:
593 if ((req
->state
!= RCM_STATE_OFFLINE
) &&
594 (req
->state
!= RCM_STATE_OFFLINE_FAIL
)) {
595 rcm_log_message(RCM_INFO
,
596 gettext("%s: invalid removing from state %d\n"),
602 case RCM_STATE_SUSPEND_FAIL
:
603 assert(req
->state
== RCM_STATE_SUSPENDING
);
606 case RCM_STATE_OFFLINE_FAIL
:
607 assert(req
->state
== RCM_STATE_OFFLINING
);
610 case RCM_STATE_SUSPEND
:
611 assert(req
->state
== RCM_STATE_SUSPENDING
);
614 case RCM_STATE_OFFLINE
:
615 assert(req
->state
== RCM_STATE_OFFLINING
);
618 case RCM_STATE_ONLINE
:
619 assert((req
->state
== RCM_STATE_RESUMING
) ||
620 (req
->state
== RCM_STATE_ONLINING
));
623 default: /* shouldn't be here */
624 rcm_log_message(RCM_ERROR
,
625 gettext("invalid update to dr state: %d\n"), state
);
626 return (RCM_FAILURE
);
631 * update the state, interval, and sequence number; sync state file
634 req
->seq_num
= seq_num
;
637 req
->interval
= *interval
;
639 bzero(&req
->interval
, sizeof (timespec_t
));
641 (void) fsync(state_fd
);
642 return (RCM_SUCCESS
);
646 add_busy_rsrc_to_list(req
->device
, req
->pid
, req
->state
,
647 req
->seq_num
, NULL
, locked_info
, locked_err
, NULL
, infop
);
651 * A request may be left in a transition state because the operator
652 * typed ctrl-C. In this case, the daemon thread continues to run
653 * and will eventually put the state in a non-transitional state.
655 * To be safe, we return EAGAIN to allow librcm to loop and retry.
656 * If we are called from a module, loop & retry could result in a
657 * deadlock. The called will check for this case and turn EAGAIN
660 if (transition_state(req
->state
)) {
664 return (RCM_CONFLICT
);
668 * Insert a dr entry in dr_req_list
671 dr_req_add(char *rsrcname
, pid_t pid
, uint_t flag
, int state
, int seq_num
,
672 timespec_t
*interval
, rcm_info_t
**info
)
678 rcm_log_message(RCM_TRACE3
, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n",
679 rsrcname
, pid
, flag
, state
, seq_num
, (void *)info
);
681 device
= resolve_name(rsrcname
);
685 (void) mutex_lock(&rcm_req_lock
);
688 * In the re-offline/suspend case, attempt to update dr request.
690 * If this succeeds, return success;
691 * If this fails because of a conflict, return error;
692 * If this this fails because no entry exists, add a new entry.
694 error
= dr_req_update_entry(device
, pid
, flag
, state
, seq_num
, interval
,
699 /* proceed to add a new entry */
710 * Check for lock conflicts
712 error
= check_lock(device
, flag
, LOCK_FOR_DR
, info
);
713 if (error
!= RCM_SUCCESS
) {
714 error
= RCM_CONFLICT
;
719 * Get empty request entry, fill in values and sync state file
721 req
= get_req_entry(&dr_req_list
);
723 req
->seq_num
= seq_num
;
727 req
->type
= rsrc_get_type(device
);
728 (void) strcpy(req
->device
, device
);
730 /* cache interval for failure recovery */
732 req
->interval
= *interval
;
734 bzero(&req
->interval
, sizeof (timespec_t
));
736 (void) fsync(state_fd
);
739 * Add initiator pid to polling list
741 add_to_polling_list(req
->pid
);
744 (void) mutex_unlock(&rcm_req_lock
);
751 * Update a dr entry in dr_req_list
755 dr_req_update(char *rsrcname
, pid_t pid
, uint_t flag
, int state
, int seq_num
,
759 char *device
= resolve_name(rsrcname
);
761 rcm_log_message(RCM_TRACE3
, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n",
762 rsrcname
, pid
, flag
, state
, seq_num
);
764 (void) mutex_lock(&rcm_req_lock
);
765 error
= dr_req_update_entry(device
, pid
, flag
, state
, seq_num
, NULL
,
767 (void) mutex_unlock(&rcm_req_lock
);
774 * This function scans the DR request list for the next, non-removed
775 * entry that is part of the specified sequence. The 'device' name
776 * of the entry is copied into the provided 'rsrc' buffer.
778 * The 'rsrc' buffer is required because the DR request list is only
779 * locked during the duration of this lookup. Giving a direct pointer
780 * to something in the list would be unsafe.
783 dr_req_lookup(int seq_num
, char *rsrc
)
787 int base
= (seq_num
>> SEQ_NUM_SHIFT
);
788 int retval
= RCM_FAILURE
;
791 return (RCM_FAILURE
);
794 (void) mutex_lock(&rcm_req_lock
);
796 for (i
= 0; i
< dr_req_list
->n_req_max
; i
++) {
798 /* Skip removed or non-matching entries */
799 if ((dr_req_list
->req
[i
].state
== RCM_STATE_REMOVE
) ||
800 ((dr_req_list
->req
[i
].seq_num
>> SEQ_NUM_SHIFT
) != base
)) {
804 /* Copy the next-matching 'device' name into 'rsrc' */
805 len
= strlcpy(rsrc
, dr_req_list
->req
[i
].device
, MAXPATHLEN
);
806 if (len
< MAXPATHLEN
) {
807 retval
= RCM_SUCCESS
;
812 (void) mutex_unlock(&rcm_req_lock
);
818 * Remove a dr entry in dr_req_list
821 dr_req_remove(char *rsrcname
, uint_t flag
)
824 char *device
= resolve_name(rsrcname
);
826 rcm_log_message(RCM_TRACE3
, "dr_req_remove(%s)\n", rsrcname
);
828 (void) mutex_lock(&rcm_req_lock
);
831 req
= find_req_entry(device
, flag
, -1, dr_req_list
);
835 (void) mutex_unlock(&rcm_req_lock
);
836 rcm_log_message(RCM_WARNING
,
837 gettext("dr_req entry %s not found\n"), rsrcname
);
841 req
->state
= RCM_STATE_REMOVE
;
842 dr_req_list
->n_req
--;
843 (void) fsync(state_fd
);
846 * remove pid from polling list
848 remove_from_polling_list(req
->pid
);
851 * We don't shrink the dr_req_list size for now.
852 * Shouldn't cause big memory leaks.
854 (void) mutex_unlock(&rcm_req_lock
);
858 * Return the list of ongoing dr operation requests
865 rcm_info_t
*result
= NULL
;
869 rcm_log_message(RCM_TRACE2
, "rsrc_dr_info()\n");
871 (void) mutex_lock(&rcm_req_lock
);
872 for (i
= 0; i
< dr_req_list
->n_req_max
; i
++) {
873 if (dr_req_list
->req
[i
].state
== RCM_STATE_REMOVE
)
876 if (dr_req_list
->req
[i
].device
[0] == '\0')
879 if (dr_req_list
->req
[i
].flag
& RCM_FILESYS
) {
880 len
= strlen(dr_req_list
->req
[i
].device
) + 5;
881 rsrc
= s_malloc(len
);
882 (void) snprintf(rsrc
, len
, "%s(fs)",
883 dr_req_list
->req
[i
].device
);
885 rsrc
= s_strdup(dr_req_list
->req
[i
].device
);
888 info
= s_calloc(1, sizeof (*info
));
889 if (errno
= nvlist_alloc(&(info
->info
), NV_UNIQUE_NAME
, 0)) {
890 rcm_log_message(RCM_ERROR
,
891 gettext("failed (nvlist_alloc=%s).\n"),
896 if (errno
= nvlist_add_string(info
->info
, RCM_RSRCNAME
, rsrc
)) {
897 rcm_log_message(RCM_ERROR
,
898 gettext("failed (nvlist_add=%s).\n"),
904 if (errno
= nvlist_add_int64(info
->info
, RCM_CLIENT_ID
,
905 dr_req_list
->req
[i
].pid
)) {
906 rcm_log_message(RCM_ERROR
,
907 gettext("failed (nvlist_add=%s).\n"),
912 if (errno
= nvlist_add_int32(info
->info
, RCM_SEQ_NUM
,
913 dr_req_list
->req
[i
].seq_num
)) {
914 rcm_log_message(RCM_ERROR
,
915 gettext("failed (nvlist_add=%s).\n"),
920 if (errno
= nvlist_add_int32(info
->info
, RCM_RSRCSTATE
,
921 dr_req_list
->req
[i
].state
)) {
922 rcm_log_message(RCM_ERROR
,
923 gettext("failed (nvlist_add=%s).\n"),
928 if (errno
= nvlist_add_string(info
->info
, RCM_CLIENT_INFO
,
929 (char *)locked_info
)) {
930 rcm_log_message(RCM_ERROR
,
931 gettext("failed (nvlist_add=%s).\n"),
939 (void) mutex_unlock(&rcm_req_lock
);
945 * Eliminate entries whose dr initiator is no longer running
946 * and recover daemon state during daemon restart.
948 * This routine is called from either during daemon initialization
949 * after all modules have registered resources or from the cleanup
950 * thread. In either case, it is the only thread running in the
958 struct clean_list
*next
;
964 } *tmp
, *list
= NULL
;
967 rcm_log_message(RCM_TRACE3
,
968 "clean_dr_list(): look for stale dr initiators\n");
973 * Make a list of entries to recover. This is necessary because
974 * the recovery operation will modify dr_req_list.
976 (void) mutex_lock(&rcm_req_lock
);
977 for (i
= 0; i
< dr_req_list
->n_req_max
; i
++) {
978 /* skip empty entries */
979 if (dr_req_list
->req
[i
].state
== RCM_STATE_REMOVE
)
982 if (dr_req_list
->req
[i
].device
[0] == '\0')
985 /* skip cascade operations */
986 if (dr_req_list
->req
[i
].seq_num
& SEQ_NUM_MASK
)
990 * In the cleanup case, ignore entries with initiators alive
992 if ((rcmd_get_state() == RCMD_CLEANUP
) &&
993 proc_exist(dr_req_list
->req
[i
].pid
))
996 rcm_log_message(RCM_TRACE1
,
997 "found stale entry: %s\n", dr_req_list
->req
[i
].device
);
999 tmp
= s_malloc(sizeof (*tmp
));
1000 tmp
->rsrcname
= s_strdup(dr_req_list
->req
[i
].device
);
1001 tmp
->state
= dr_req_list
->req
[i
].state
;
1002 tmp
->pid
= dr_req_list
->req
[i
].pid
;
1003 tmp
->seq_num
= dr_req_list
->req
[i
].seq_num
;
1004 tmp
->interval
= dr_req_list
->req
[i
].interval
;
1008 (void) mutex_unlock(&rcm_req_lock
);
1014 * If everything worked normally, we shouldn't be here.
1015 * Since we are here, something went wrong, so say something.
1017 if (rcmd_get_state() == RCMD_INIT
) {
1018 rcm_log_message(RCM_NOTICE
, gettext("rcm_daemon died "
1019 "unexpectedly, recovering previous daemon state\n"));
1021 rcm_log_message(RCM_INFO
, gettext("one or more dr initiator "
1022 "died, attempting automatic recovery\n"));
1029 switch (tmp
->state
) {
1030 case RCM_STATE_OFFLINE_QUERY
:
1031 case RCM_STATE_OFFLINE_QUERY_FAIL
:
1032 rsrcnames
[0] = tmp
->rsrcname
;
1033 if (proc_exist(tmp
->pid
)) {
1035 (void) process_resource_offline(rsrcnames
,
1036 tmp
->pid
, RCM_QUERY
, tmp
->seq_num
, NULL
);
1039 (void) notify_resource_online(rsrcnames
,
1040 tmp
->pid
, 0, tmp
->seq_num
, NULL
);
1044 case RCM_STATE_OFFLINE
:
1045 case RCM_STATE_OFFLINE_FAIL
:
1046 rsrcnames
[0] = tmp
->rsrcname
;
1047 if (proc_exist(tmp
->pid
)) {
1049 (void) process_resource_offline(rsrcnames
,
1050 tmp
->pid
, 0, tmp
->seq_num
, NULL
);
1053 (void) notify_resource_online(rsrcnames
,
1054 tmp
->pid
, 0, tmp
->seq_num
, NULL
);
1058 case RCM_STATE_SUSPEND_QUERY
:
1059 case RCM_STATE_SUSPEND_QUERY_FAIL
:
1060 rsrcnames
[0] = tmp
->rsrcname
;
1061 if (proc_exist(tmp
->pid
)) {
1063 (void) process_resource_suspend(rsrcnames
,
1064 tmp
->pid
, RCM_QUERY
, tmp
->seq_num
,
1065 &tmp
->interval
, NULL
);
1068 (void) notify_resource_resume(rsrcnames
,
1069 tmp
->pid
, 0, tmp
->seq_num
, NULL
);
1073 case RCM_STATE_SUSPEND
:
1074 case RCM_STATE_SUSPEND_FAIL
:
1075 rsrcnames
[0] = tmp
->rsrcname
;
1076 if (proc_exist(tmp
->pid
)) {
1078 (void) process_resource_suspend(rsrcnames
,
1079 tmp
->pid
, 0, tmp
->seq_num
, &tmp
->interval
,
1083 (void) notify_resource_resume(rsrcnames
,
1084 tmp
->pid
, 0, tmp
->seq_num
, NULL
);
1088 case RCM_STATE_OFFLINING
:
1089 case RCM_STATE_ONLINING
:
1090 rsrcnames
[0] = tmp
->rsrcname
;
1091 (void) notify_resource_online(rsrcnames
, tmp
->pid
, 0,
1092 tmp
->seq_num
, NULL
);
1095 case RCM_STATE_SUSPENDING
:
1096 case RCM_STATE_RESUMING
:
1097 rsrcnames
[0] = tmp
->rsrcname
;
1098 (void) notify_resource_resume(rsrcnames
, tmp
->pid
, 0,
1099 tmp
->seq_num
, NULL
);
1102 case RCM_STATE_REMOVING
:
1103 rsrcnames
[0] = tmp
->rsrcname
;
1104 (void) notify_resource_remove(rsrcnames
, tmp
->pid
, 0,
1105 tmp
->seq_num
, NULL
);
1109 rcm_log_message(RCM_WARNING
,
1110 gettext("%s in unknown state %d\n"),
1111 tmp
->rsrcname
, tmp
->state
);
1114 free(tmp
->rsrcname
);
1120 * Selected thread blocking based on event type
1125 * Change barrier state:
1126 * RCMD_INIT - daemon is intializing, only register allowed
1127 * RCMD_NORMAL - normal daemon processing
1128 * RCMD_CLEANUP - cleanup thread is waiting or running
1133 return (barrier
.state
);
1137 rcmd_set_state(int state
)
1140 * The state transition is as follows:
1141 * INIT --> NORMAL <---> CLEANUP
1142 * The implementation favors the cleanup thread
1145 (void) mutex_lock(&barrier
.lock
);
1146 barrier
.state
= state
;
1151 * Wait for existing threads to exit
1154 while (barrier
.thr_count
!= 0)
1155 (void) cond_wait(&barrier
.cv
, &barrier
.lock
);
1157 barrier
.thr_count
= -1;
1163 if (barrier
.thr_count
== -1)
1164 barrier
.thr_count
= 0;
1166 (void) cond_broadcast(&barrier
.cv
);
1170 (void) mutex_unlock(&barrier
.lock
);
1174 * Increment daemon thread count
1177 rcmd_thr_incr(int cmd
)
1181 (void) mutex_lock(&barrier
.lock
);
1188 * Wait till it is safe for daemon to perform the operation
1190 * NOTE: if a module registers by passing a request to the
1191 * client proccess, we may need to allow register
1192 * to come through during daemon initialization.
1194 while (barrier
.state
!= RCMD_NORMAL
)
1195 (void) cond_wait(&barrier
.cv
, &barrier
.lock
);
1197 if ((cmd
== CMD_EVENT
) ||
1198 (cmd
== CMD_REGISTER
) ||
1199 (cmd
== CMD_UNREGISTER
)) {
1201 * Event passthru and register ops don't need sequence number
1206 * Non register operation gets a sequence number
1208 seq_num
= get_seq_number();
1211 barrier
.thr_count
++;
1212 (void) mutex_unlock(&barrier
.lock
);
1214 if ((cmd
== CMD_OFFLINE
) ||
1215 (cmd
== CMD_SUSPEND
) ||
1216 (cmd
== CMD_GETINFO
)) {
1218 * For these operations, need to ask modules to
1219 * register any new resources that came online.
1221 * This is because mount/umount are not instrumented
1222 * to register with rcm before using system resources.
1223 * Certain registration ops may fail during sync, which
1224 * indicates race conditions. This cannot be avoided
1225 * without changing mount/umount.
1234 * Decrement thread count
1240 * Decrement thread count and wake up reload/cleanup thread.
1242 (void) mutex_lock(&barrier
.lock
);
1243 barrier
.last_update
= time(NULL
);
1244 if (--barrier
.thr_count
== 0)
1245 (void) cond_broadcast(&barrier
.cv
);
1246 (void) mutex_unlock(&barrier
.lock
);
1250 * Wakeup all waiting threads as a result of SIGHUP
1252 static int sighup_received
= 0;
1257 (void) mutex_lock(&barrier
.lock
);
1258 sighup_received
= 1;
1259 (void) cond_broadcast(&barrier
.cv
);
1260 (void) mutex_unlock(&barrier
.lock
);
1264 rcmd_start_timer(int timeout
)
1266 timestruc_t abstime
;
1269 timeout
= RCM_DAEMON_TIMEOUT
; /* default to 5 minutes */
1271 dr_req_list
->idle_timeout
= timeout
; /* persist timeout */
1274 abstime
.tv_sec
= time(NULL
) + timeout
;
1277 (void) mutex_lock(&barrier
.lock
);
1283 (void) cond_timedwait(&barrier
.cv
, &barrier
.lock
,
1286 (void) cond_wait(&barrier
.cv
, &barrier
.lock
);
1289 * If sighup received, change timeout to 0 so the daemon is
1290 * shut down at the first possible moment
1292 if (sighup_received
)
1296 * If timeout is negative, never shutdown the daemon
1302 * Check for ongoing/pending activity
1304 is_active
= (barrier
.thr_count
|| barrier
.wanted
||
1305 (dr_req_list
->n_req
!= 0));
1307 abstime
.tv_sec
= time(NULL
) + timeout
;
1312 * If idletime is less than timeout, continue to wait
1314 idletime
= time(NULL
) - barrier
.last_update
;
1315 if (idletime
< timeout
) {
1316 abstime
.tv_sec
= barrier
.last_update
+ timeout
;
1322 (void) script_main_fini();
1324 rcm_log_message(RCM_INFO
, gettext("rcm_daemon is shut down.\n"));
1328 * Code related to polling client pid's
1329 * Not declared as static so that we can find this structure easily
1335 thread_t poll_tid
; /* poll thread id */
1340 cond_t cv
; /* the associated lock is rcm_req_lock */
1344 find_pid_index(pid_t pid
)
1348 for (i
= 0; i
< polllist
.n_pids
; i
++) {
1349 if (polllist
.pids
[i
] == pid
) {
1357 * Resize buffer for new pids
1362 const int n_chunk
= 10;
1365 int index
= polllist
.n_pids
;
1367 if (polllist
.n_pids
< polllist
.n_max_pids
) {
1372 if (polllist
.n_max_pids
== 0) {
1374 polllist
.pids
= s_calloc(n_max
, sizeof (pid_t
));
1375 polllist
.refcnt
= s_calloc(n_max
, sizeof (int));
1376 polllist
.fds
= s_calloc(n_max
, sizeof (struct pollfd
));
1378 n_max
= polllist
.n_max_pids
+ n_chunk
;
1379 polllist
.pids
= s_realloc(polllist
.pids
,
1380 n_max
* sizeof (pid_t
));
1381 polllist
.refcnt
= s_realloc(polllist
.refcnt
,
1382 n_max
* sizeof (int));
1383 polllist
.fds
= s_realloc(polllist
.fds
,
1384 n_max
* sizeof (struct pollfd
));
1386 polllist
.n_max_pids
= n_max
;
1392 * rcm_req_lock must be held
1395 add_to_polling_list(pid_t pid
)
1398 char procfile
[MAXPATHLEN
];
1400 if (pid
== (pid_t
)0)
1403 rcm_log_message(RCM_TRACE1
, "add_to_polling_list(%ld)\n", pid
);
1406 * Need to stop the poll thread before manipulating the polllist
1407 * since poll thread may possibly be using polllist.fds[] and
1408 * polllist.n_pids. As an optimization, first check if the pid
1409 * is already in the polllist. If it is, there is no need to
1410 * stop the poll thread. Just increment the pid reference count
1413 index
= find_pid_index(pid
);
1415 polllist
.refcnt
[index
]++;
1419 stop_polling_thread();
1422 * In an attempt to stop the poll thread we may have released
1423 * and reacquired rcm_req_lock. So find the index again.
1425 index
= find_pid_index(pid
);
1427 polllist
.refcnt
[index
]++;
1434 (void) sprintf(procfile
, "/proc/%ld/as", pid
);
1435 if ((fd
= open(procfile
, O_RDONLY
)) == -1) {
1436 rcm_log_message(RCM_NOTICE
, gettext("open(%s): %s\n"),
1437 procfile
, strerror(errno
));
1442 * add pid to polllist
1444 index
= get_pid_index();
1445 polllist
.pids
[index
] = pid
;
1446 polllist
.refcnt
[index
] = 1;
1447 polllist
.fds
[index
].fd
= fd
;
1448 polllist
.fds
[index
].events
= 0;
1449 polllist
.fds
[index
].revents
= 0;
1451 rcm_log_message(RCM_DEBUG
, "add pid %ld at index %ld\n", pid
, index
);
1454 start_polling_thread();
1458 * rcm_req_lock must be held
1461 remove_from_polling_list(pid_t pid
)
1465 if (pid
== (pid_t
)0)
1468 rcm_log_message(RCM_TRACE1
, "remove_from_polling_list(%ld)\n", pid
);
1471 * Need to stop the poll thread before manipulating the polllist
1472 * since poll thread may possibly be using polllist.fds[] and
1473 * polllist.n_pids. As an optimization, first check the pid
1474 * reference count. If the pid reference count is greater than 1
1475 * there is no need to stop the polling thread.
1478 index
= find_pid_index(pid
);
1480 rcm_log_message(RCM_NOTICE
,
1481 gettext("error removing pid %ld from polling list\n"), pid
);
1486 * decrement the pid refcnt
1488 if (polllist
.refcnt
[index
] > 1) {
1489 polllist
.refcnt
[index
]--;
1493 stop_polling_thread();
1496 * In an attempt to stop the poll thread we may have released
1497 * and reacquired rcm_req_lock. So find the index again.
1499 index
= find_pid_index(pid
);
1501 rcm_log_message(RCM_NOTICE
,
1502 gettext("error removing pid %ld from polling list\n"), pid
);
1506 if (--polllist
.refcnt
[index
] > 0)
1510 * refcnt down to zero, delete pid from polling list
1512 (void) close(polllist
.fds
[index
].fd
);
1515 for (i
= index
; i
< polllist
.n_pids
; i
++) {
1516 polllist
.pids
[i
] = polllist
.pids
[i
+ 1];
1517 polllist
.refcnt
[i
] = polllist
.refcnt
[i
+ 1];
1518 bcopy(&polllist
.fds
[i
+ 1], &polllist
.fds
[i
],
1519 sizeof (struct pollfd
));
1522 rcm_log_message(RCM_DEBUG
, "remove pid %ld at index %d\n", pid
, index
);
1525 start_polling_thread();
1531 polllist
.poll_tid
= (thread_t
)-1;
1535 cleanup_poll_thread()
1537 (void) mutex_lock(&rcm_req_lock
);
1538 if (polllist
.poll_tid
== thr_self()) {
1539 rcm_log_message(RCM_TRACE2
,
1540 "cleanup_poll_thread: n_pids = %d\n", polllist
.n_pids
);
1541 polllist
.poll_tid
= (thread_t
)-1;
1542 (void) cond_broadcast(&polllist
.cv
);
1544 (void) mutex_unlock(&rcm_req_lock
);
1553 rcm_log_message(RCM_TRACE2
, "poll thread started. n_pids = %d\n",
1557 * Unblock SIGUSR1 to allow polling thread to be killed
1559 (void) sigemptyset(&mask
);
1560 (void) sigaddset(&mask
, SIGUSR1
);
1561 (void) thr_sigsetmask(SIG_UNBLOCK
, &mask
, NULL
);
1563 (void) poll(polllist
.fds
, polllist
.n_pids
, (time_t)-1);
1566 * block SIGUSR1 to avoid being killed while holding a lock
1568 (void) sigemptyset(&mask
);
1569 (void) sigaddset(&mask
, SIGUSR1
);
1570 (void) thr_sigsetmask(SIG_BLOCK
, &mask
, NULL
);
1572 rcm_log_message(RCM_TRACE2
, "returned from poll()\n");
1574 cleanup_poll_thread();
1576 (void) mutex_lock(&barrier
.lock
);
1578 (void) cond_broadcast(&barrier
.cv
);
1579 (void) mutex_unlock(&barrier
.lock
);
1585 * rcm_req_lock must be held
1588 start_polling_thread()
1592 if (rcmd_get_state() != RCMD_NORMAL
)
1595 if (polllist
.poll_tid
!= (thread_t
)-1 || polllist
.n_pids
== 0)
1598 if ((err
= thr_create(NULL
, 0, pollfunc
, NULL
, THR_DETACHED
,
1599 &polllist
.poll_tid
)) == 0)
1600 polllist
.signaled
= 0;
1602 rcm_log_message(RCM_ERROR
,
1603 gettext("failed to create polling thread: %s\n"),
1608 * rcm_req_lock must be held
1611 stop_polling_thread()
1615 while (polllist
.poll_tid
!= (thread_t
)-1) {
1616 if (polllist
.signaled
== 0) {
1617 if ((err
= thr_kill(polllist
.poll_tid
, SIGUSR1
)) == 0)
1618 polllist
.signaled
= 1;
1621 * thr_kill shouldn't have failed since the
1622 * poll thread id and the signal are valid.
1623 * So log an error. Since when thr_kill
1624 * fails no signal is sent (as per man page),
1625 * the cond_wait below will wait until the
1626 * the poll thread exits by some other means.
1627 * The poll thread, for example, exits on its
1628 * own when any DR initiator process that it
1629 * is currently polling exits.
1631 rcm_log_message(RCM_ERROR
,
1633 "fail to kill polling thread %d: %s\n"),
1634 polllist
.poll_tid
, strerror(err
));
1636 (void) cond_wait(&polllist
.cv
, &rcm_req_lock
);